{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 1: Load and examine a superstore sales data from an Excel file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel(\"Sample - Superstore.xls\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Row ID</th>\n",
       "      <th>Order ID</th>\n",
       "      <th>Order Date</th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Customer ID</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Segment</th>\n",
       "      <th>Country</th>\n",
       "      <th>City</th>\n",
       "      <th>...</th>\n",
       "      <th>Postal Code</th>\n",
       "      <th>Region</th>\n",
       "      <th>Product ID</th>\n",
       "      <th>Category</th>\n",
       "      <th>Sub-Category</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Discount</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>CA-2016-152156</td>\n",
       "      <td>2016-11-08</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>CG-12520</td>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>United States</td>\n",
       "      <td>Henderson</td>\n",
       "      <td>...</td>\n",
       "      <td>42420</td>\n",
       "      <td>South</td>\n",
       "      <td>FUR-BO-10001798</td>\n",
       "      <td>Furniture</td>\n",
       "      <td>Bookcases</td>\n",
       "      <td>Bush Somerset Collection Bookcase</td>\n",
       "      <td>261.9600</td>\n",
       "      <td>2</td>\n",
       "      <td>0.00</td>\n",
       "      <td>41.9136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>CA-2016-152156</td>\n",
       "      <td>2016-11-08</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>CG-12520</td>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>United States</td>\n",
       "      <td>Henderson</td>\n",
       "      <td>...</td>\n",
       "      <td>42420</td>\n",
       "      <td>South</td>\n",
       "      <td>FUR-CH-10000454</td>\n",
       "      <td>Furniture</td>\n",
       "      <td>Chairs</td>\n",
       "      <td>Hon Deluxe Fabric Upholstered Stacking Chairs,...</td>\n",
       "      <td>731.9400</td>\n",
       "      <td>3</td>\n",
       "      <td>0.00</td>\n",
       "      <td>219.5820</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>CA-2016-138688</td>\n",
       "      <td>2016-06-12</td>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>DV-13045</td>\n",
       "      <td>Darrin Van Huff</td>\n",
       "      <td>Corporate</td>\n",
       "      <td>United States</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>...</td>\n",
       "      <td>90036</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-LA-10000240</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Labels</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>14.6200</td>\n",
       "      <td>2</td>\n",
       "      <td>0.00</td>\n",
       "      <td>6.8714</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>US-2015-108966</td>\n",
       "      <td>2015-10-11</td>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>SO-20335</td>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>United States</td>\n",
       "      <td>Fort Lauderdale</td>\n",
       "      <td>...</td>\n",
       "      <td>33311</td>\n",
       "      <td>South</td>\n",
       "      <td>FUR-TA-10000577</td>\n",
       "      <td>Furniture</td>\n",
       "      <td>Tables</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>957.5775</td>\n",
       "      <td>5</td>\n",
       "      <td>0.45</td>\n",
       "      <td>-383.0310</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>US-2015-108966</td>\n",
       "      <td>2015-10-11</td>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>SO-20335</td>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>United States</td>\n",
       "      <td>Fort Lauderdale</td>\n",
       "      <td>...</td>\n",
       "      <td>33311</td>\n",
       "      <td>South</td>\n",
       "      <td>OFF-ST-10000760</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Storage</td>\n",
       "      <td>Eldon Fold 'N Roll Cart System</td>\n",
       "      <td>22.3680</td>\n",
       "      <td>2</td>\n",
       "      <td>0.20</td>\n",
       "      <td>2.5164</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>CA-2014-115812</td>\n",
       "      <td>2014-06-09</td>\n",
       "      <td>2014-06-14</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>United States</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>...</td>\n",
       "      <td>90032</td>\n",
       "      <td>West</td>\n",
       "      <td>FUR-FU-10001487</td>\n",
       "      <td>Furniture</td>\n",
       "      <td>Furnishings</td>\n",
       "      <td>Eldon Expressions Wood and Plastic Desk Access...</td>\n",
       "      <td>48.8600</td>\n",
       "      <td>7</td>\n",
       "      <td>0.00</td>\n",
       "      <td>14.1694</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>CA-2014-115812</td>\n",
       "      <td>2014-06-09</td>\n",
       "      <td>2014-06-14</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>United States</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>...</td>\n",
       "      <td>90032</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-AR-10002833</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Art</td>\n",
       "      <td>Newell 322</td>\n",
       "      <td>7.2800</td>\n",
       "      <td>4</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.9656</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>CA-2014-115812</td>\n",
       "      <td>2014-06-09</td>\n",
       "      <td>2014-06-14</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>United States</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>...</td>\n",
       "      <td>90032</td>\n",
       "      <td>West</td>\n",
       "      <td>TEC-PH-10002275</td>\n",
       "      <td>Technology</td>\n",
       "      <td>Phones</td>\n",
       "      <td>Mitel 5320 IP Phone VoIP phone</td>\n",
       "      <td>907.1520</td>\n",
       "      <td>6</td>\n",
       "      <td>0.20</td>\n",
       "      <td>90.7152</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>CA-2014-115812</td>\n",
       "      <td>2014-06-09</td>\n",
       "      <td>2014-06-14</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>United States</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>...</td>\n",
       "      <td>90032</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-BI-10003910</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Binders</td>\n",
       "      <td>DXL Angle-View Binders with Locking Rings by S...</td>\n",
       "      <td>18.5040</td>\n",
       "      <td>3</td>\n",
       "      <td>0.20</td>\n",
       "      <td>5.7825</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>CA-2014-115812</td>\n",
       "      <td>2014-06-09</td>\n",
       "      <td>2014-06-14</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>United States</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>...</td>\n",
       "      <td>90032</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-AP-10002892</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Appliances</td>\n",
       "      <td>Belkin F5C206VTEL 6 Outlet Surge</td>\n",
       "      <td>114.9000</td>\n",
       "      <td>5</td>\n",
       "      <td>0.00</td>\n",
       "      <td>34.4700</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 21 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Row ID        Order ID Order Date  Ship Date       Ship Mode Customer ID  \\\n",
       "0       1  CA-2016-152156 2016-11-08 2016-11-11    Second Class    CG-12520   \n",
       "1       2  CA-2016-152156 2016-11-08 2016-11-11    Second Class    CG-12520   \n",
       "2       3  CA-2016-138688 2016-06-12 2016-06-16    Second Class    DV-13045   \n",
       "3       4  US-2015-108966 2015-10-11 2015-10-18  Standard Class    SO-20335   \n",
       "4       5  US-2015-108966 2015-10-11 2015-10-18  Standard Class    SO-20335   \n",
       "5       6  CA-2014-115812 2014-06-09 2014-06-14  Standard Class    BH-11710   \n",
       "6       7  CA-2014-115812 2014-06-09 2014-06-14  Standard Class    BH-11710   \n",
       "7       8  CA-2014-115812 2014-06-09 2014-06-14  Standard Class    BH-11710   \n",
       "8       9  CA-2014-115812 2014-06-09 2014-06-14  Standard Class    BH-11710   \n",
       "9      10  CA-2014-115812 2014-06-09 2014-06-14  Standard Class    BH-11710   \n",
       "\n",
       "     Customer Name    Segment        Country             City    ...     \\\n",
       "0      Claire Gute   Consumer  United States        Henderson    ...      \n",
       "1      Claire Gute   Consumer  United States        Henderson    ...      \n",
       "2  Darrin Van Huff  Corporate  United States      Los Angeles    ...      \n",
       "3   Sean O'Donnell   Consumer  United States  Fort Lauderdale    ...      \n",
       "4   Sean O'Donnell   Consumer  United States  Fort Lauderdale    ...      \n",
       "5  Brosina Hoffman   Consumer  United States      Los Angeles    ...      \n",
       "6  Brosina Hoffman   Consumer  United States      Los Angeles    ...      \n",
       "7  Brosina Hoffman   Consumer  United States      Los Angeles    ...      \n",
       "8  Brosina Hoffman   Consumer  United States      Los Angeles    ...      \n",
       "9  Brosina Hoffman   Consumer  United States      Los Angeles    ...      \n",
       "\n",
       "  Postal Code  Region       Product ID         Category Sub-Category  \\\n",
       "0       42420   South  FUR-BO-10001798        Furniture    Bookcases   \n",
       "1       42420   South  FUR-CH-10000454        Furniture       Chairs   \n",
       "2       90036    West  OFF-LA-10000240  Office Supplies       Labels   \n",
       "3       33311   South  FUR-TA-10000577        Furniture       Tables   \n",
       "4       33311   South  OFF-ST-10000760  Office Supplies      Storage   \n",
       "5       90032    West  FUR-FU-10001487        Furniture  Furnishings   \n",
       "6       90032    West  OFF-AR-10002833  Office Supplies          Art   \n",
       "7       90032    West  TEC-PH-10002275       Technology       Phones   \n",
       "8       90032    West  OFF-BI-10003910  Office Supplies      Binders   \n",
       "9       90032    West  OFF-AP-10002892  Office Supplies   Appliances   \n",
       "\n",
       "                                        Product Name     Sales  Quantity  \\\n",
       "0                  Bush Somerset Collection Bookcase  261.9600         2   \n",
       "1  Hon Deluxe Fabric Upholstered Stacking Chairs,...  731.9400         3   \n",
       "2  Self-Adhesive Address Labels for Typewriters b...   14.6200         2   \n",
       "3      Bretford CR4500 Series Slim Rectangular Table  957.5775         5   \n",
       "4                     Eldon Fold 'N Roll Cart System   22.3680         2   \n",
       "5  Eldon Expressions Wood and Plastic Desk Access...   48.8600         7   \n",
       "6                                         Newell 322    7.2800         4   \n",
       "7                     Mitel 5320 IP Phone VoIP phone  907.1520         6   \n",
       "8  DXL Angle-View Binders with Locking Rings by S...   18.5040         3   \n",
       "9                   Belkin F5C206VTEL 6 Outlet Surge  114.9000         5   \n",
       "\n",
       "   Discount    Profit  \n",
       "0      0.00   41.9136  \n",
       "1      0.00  219.5820  \n",
       "2      0.00    6.8714  \n",
       "3      0.45 -383.0310  \n",
       "4      0.20    2.5164  \n",
       "5      0.00   14.1694  \n",
       "6      0.00    1.9656  \n",
       "7      0.20   90.7152  \n",
       "8      0.20    5.7825  \n",
       "9      0.00   34.4700  \n",
       "\n",
       "[10 rows x 21 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.drop('Row ID',axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(9994, 20)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 2: Subsetting the DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer ID</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>City</th>\n",
       "      <th>Postal Code</th>\n",
       "      <th>Sales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>90032</td>\n",
       "      <td>48.860</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>90032</td>\n",
       "      <td>7.280</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>90032</td>\n",
       "      <td>907.152</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>90032</td>\n",
       "      <td>18.504</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>BH-11710</td>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>90032</td>\n",
       "      <td>114.900</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Customer ID    Customer Name         City  Postal Code    Sales\n",
       "5    BH-11710  Brosina Hoffman  Los Angeles        90032   48.860\n",
       "6    BH-11710  Brosina Hoffman  Los Angeles        90032    7.280\n",
       "7    BH-11710  Brosina Hoffman  Los Angeles        90032  907.152\n",
       "8    BH-11710  Brosina Hoffman  Los Angeles        90032   18.504\n",
       "9    BH-11710  Brosina Hoffman  Los Angeles        90032  114.900"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_subset = df.loc[[i for i in range(5,10)],['Customer ID','Customer Name','City','Postal Code','Sales']]\n",
    "df_subset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 3: An example use case – determining statistics on sales and profit for records 100-199"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_subset = df.loc[[i for i in range(100,200)],['Sales','Profit']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Sales</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>100.000000</td>\n",
       "      <td>100.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>262.957220</td>\n",
       "      <td>0.347574</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>858.983762</td>\n",
       "      <td>170.744869</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.788000</td>\n",
       "      <td>-1359.992000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>21.327000</td>\n",
       "      <td>1.635900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>66.960000</td>\n",
       "      <td>9.653600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>177.095000</td>\n",
       "      <td>23.458800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>8159.952000</td>\n",
       "      <td>585.552000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Sales       Profit\n",
       "count   100.000000   100.000000\n",
       "mean    262.957220     0.347574\n",
       "std     858.983762   170.744869\n",
       "min       1.788000 -1359.992000\n",
       "25%      21.327000     1.635900\n",
       "50%      66.960000     9.653600\n",
       "75%     177.095000    23.458800\n",
       "max    8159.952000   585.552000"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_subset.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEKCAYAAAAcgp5RAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAHWNJREFUeJzt3X10XPV95/H3x5YxJqYJLKB6sYlJcVPZSoKLIQ92iBWf8lBI7aYJtfKwzqJTb3fBodlsbBO1MaRH1N605IGEBRO5qFsi4uYJBy8Q4ko0agOEJARsiwefYBobgzEpxA9E2Oa7f9yfnPEgWTOyRiNffV7nzNHM7/7uvb+ZufOZq9/93TuKCMzMLL/GVLsBZmZWWQ56M7Occ9CbmeWcg97MLOcc9GZmOeegNzPLOQf9CCXpGklRcNsn6VFJi0dAu3aVOc9xab6zh7AddZJ+IGlven2mDtWy+1jXVkl/W6nlDxdJV0oaEeOpJc2X1C3pFUlbU1lIurKgzmJJC6rWyBypqXYD7IheAi5K918HvA+4WdKeiPha9ZpVtuOAFcBW4OEhWubngDcAfwTsBXYM0XKtwiSNBf4BuAv4M7L3D+CdwFMFVRcDG4HvDGsDc8hBP7IdiIj7Cx5vkPQuYAFwLAV9JfwesC4iNlS7IQaSJkTEyyVWnwT8FvC1iOjqLSza1m0Iuevm2LMbGFdYIOlMSd+R9CtJuyV9V9JZBdM/KOlVSfMKyqam+i0Fj0PShyT937ScnZJWDNSggdaf2gzw9wVdUVOPsLyzJW1I3VX/Iek2SbWF7QR+B/hEWlbnEZbVJGmzpJcl7ZJ0n6QZBdNXpi6xPZK2pXX9dgnP+d1pWfskvSDpFkknFkx/g6SvSnpG0q8l/bukWwZY5iWS7k2v+68k3S/pgqI616TnMTNN3yfpp5LeXVRvvKQvS3pR0i8lfZ6i7aafNvQuf7akn6S2PyxpTlG9rZL+TtJfSdoG/Kpg2mXpNe2R9AtJLZJq0rSPAb9IVe9I7981adqhrpv0np4DLCrYZj42UPutHxHh2wi8AdcAu8j+66oh2wP6CHAA+C8F9cYDPwceB/4U+BOyf3e3AycX1GsHnk7LEdAB/Aw4Lk2fCkSa72bgQqAFeBW4orhd5awfaEjL/mvgHek2vp/nfSrwIvBDsv9cPgJsAx4h6wIan+bfAdyW7k/vZ1nnA/uBq4G5ZN08fwO8q6DOrWkdc4EPpPVuBsYU1NkK/G3B49lAD/B14A+Bj6bn+42COmuAx9Jr8p60jtUDvOdXAp8g6677A+B64CAwu+j135dej/8KXAzcDzwPnFBQ7/PAr4FPpjrfSq9jlLDd7SPrQllM1l3YSfZl/dtFr8kO4PvpdX1/Kr8gvddt6XksTa/VTQXv7x+nOp9M79/kNC2AK9P96UA3sL5gmzm12p/LY/VW9Qb41s8bk33goo/bF4vq/TlZ+L+poGwy8ApwdUHZycAzQCvw8fThe1vB9Klp+d8rWv4tKcTGFLRrVznrByamZX+shOe9kizof6ug7O1p/saCsq0UhG8/y/pfwI/LeM3HAqendZ3f37qAHwAdRfO+N81Xnx5vBJYcxfs/huwL/h5gTR/bxXsLys5OZRelx/8JeBlYVrS8xygt6AP4UEHZROCXwMqi12QHcHzR/Pf38dosJfvC6g303m3t0qJ6h4I+PX4IuHW4PnN5vrnrZmR7CTg33eYAV5H9K1vYnXIe8JOI+HlvQURsA/41zdNb9kuyA1+Xkx3I/GxE/KyPdX676PG3gP9MFt59KWn9ZTiP7MvmUFdARDxAFizlLu9hYKakz0s6X9JxxRUkXSzp3yS9RPaFtS1N+t2+FijpBLKDhmsl1fTegC6y/x7OKVj3pyT9D0l9LquPZU+W1CZpe2rLfrI95OL5XyHby+61Of3tfY/eAhwP3NFbISJeLXxcgkPbQUTsAe4le28KbYiIXxe0fyzw+8A/FdX7OtkXzTvLWL8NIQf9yHYgIh5Kt3+NiC8BnwU+LenkVGcS8Fwf8z5Hthdf6J9T+RiyPfW+7Ozn8aR+6pez/lIM2fIi4vtk3RvnkwXjLklfkfQ6AEnnAuvIwv2jZEH0jjT78f0s9iSyPf8byYK499ZD1gc+JdW7kmy0yGeAxyU9KWlhf22VNCa15V1pngayL/i7+mjL7hTcvc/zlaI29x5j6O+9HMieeO2B1Z28dhsofp9OIXsNist7Hw9me7Ah4FE3x55usr7q3yH7d3oHMKOPerVpeqGVZCH1LPAF4EN9zHdaP4/7G75YzvpLsaOPNvQu78flLiwi2oA2SacC7yfru94NLCfrK34e+NNIfQWS3jjAIl8k62K4Bvh/fUx/Jq33RbIuso9LeitZ98Vtkh6JiM19zHcWMBO4OCLu7i2UNKHEp1ro2fT3NA5/D/p6XfsyUa8dRXMar90Gisfk7yL70iteT236O5jtwYaA9+iPPfXpb+/IhQeAcySd2VtB0ulke4ZdBWVzgSXAfweagEZJf9LH8v+46PH7yT7g2/qoW+r6i/c4j+QB4MKiESznkvXrdvU300Ai4vmIuJmsf316Kp4A7O8N+eTDAyxnL1k/9JsL/tsqvD3TxzyPAJ8i+7z9Xj+L7g30nt6C9KUzu4SnV+xRsgOx8wuWNabwcQkObQeSJpIdHH7wSDNExEGyL+MPFk26jOyg/g/LWD9k200p24wNwHv0I1uNpN6uhOPI+n//ErgjInr32m4FlgF3SfoM2UGvFWR7VzfDoQ/qGuDrEfGNVHYz8H8k/UtEPF+wzhlp2jfJujyagKsKuwqKDLj+iHhF0lPAZZI2koXQIwVdDoWuJ/syukfSKrIDgSvJwuubA79kvyHpWrLugs7UnplkI2CWpyr3An8h6QvAd8m+nD5SwqKXkp3T8CrwDbL/EM4ALgGaI+IJSV1k/dwbyfZ8e08M6i8sHyP7Mv07SX8FnAhcS3YgvCwR8YKk1cC1kg4Am9L6J5a4iJeBlrTdPEN2UPs44IslzLuC7L37e+B2suMFfw3cko7dlOMxsi/9C4EXgKci4oUyl2HgUTcj9cZrR928AjwJrAJOLKr7JrL+4N3AHuBOYFrB9JvJ9soLh1tOJBsW+c30eGpaz4fJhmLuJuvWuBZQUbt2lbP+VOcCsiGBv07rmXqE5z6T7HjCPrKukq8BtUV1tjLwqJtLgQ3pefyabAjo8qLns5Tsv6O9ZEMFp/Ha0R+vWRfZSKC7ycaP7yU7IHo98Po0/XNkX06703PoAN49QHvPJfsieDm91x8j+yJ96EivfyovbvN4suMILwH/AdwA/E9KG3WzC3g32QHlHrJhuOcX1ev39ScbUvoo2Ta7jWyYbk3B9N5tbaBRN29K78lLlDhqy7e+b0ovqI1yyk5gegp4X0TcWd3WWLWkk5eujIhTqt0WGzruozczy7mSgj6d7vxoOhX6oVR2cjpd+8n096SC+ldL2iLp8dS/ZmZmVVJS142yy4jOiohdBWX/G/hlRKyUtBw4KSKWSZpO1sd7HtmJNt8HfjeyI/JmZjbMjqbrZj7Z9SxIfxcUlN8eET0R8RSwhdeeUWdmZsOk1OGVAXxf0kHg5ohYTTYKovcEimf5zUkRp5ONM+61LZUdRtkPaCwGmDBhwjlTpkwprmJl2Lp1K6eddhonnHACr776KmPGjGHfvn3s3LmTqVOnVrt5ZgCHtk0bGk888cSuiDh1oHqlBv2ciNgu6TTgXkmPFU6MiFCZv1yTvixWA8yaNSseeuihcma3Iu3t7TQ3N3PTTTdx8OBBxo4dS1NTE7feeiuNjY3Vbp4ZAJ2dncydO7fazcgNSU+XUq+koI+I7envTknfJuuKeU7SpIjYIWkSv7mOxnZ+c70PyC60VPZJH1ae3jBfsmQJ3d3d1NXV0dLS4pA3s4H76CW9rvd09HQxqAvIzvZbByxK1RbxmyvjrQMWph8+OJPsBJQjnjptQ6OxsZGNGzeyYcMGNm7c6JA3M6C0Pfpa4NuSeut/LSLulvQjsku1NpH9oMVlABGxSdJasjMFD5D9aIVH3JiZVcmAQR/Zdcbf1kf5C8C8184BEdFCdtqzmZlVmQ9/m5nlnIPezCznHPRmZjnnoDczyzkHvZlZzjnozcxyzkFvZpZzDnozs5xz0JuZ5ZyD3sws5xz0OdLe3k59fT3z5s2jvr6e9vb2ajfJzEaAUq9HbyNc7/XoW1tbD7sePeCrWJqNct6jz4mWlhZaW1tpaGigpqaGhoYGWltbaWnxteXMRjsHfU50d3czZ86cw8rmzJlDd3d3lVpkZiOFgz4n6urq6OrqOqysq6uLurq6KrXIzEYKB31ONDc309TUREdHBwcOHKCjo4Ompiaam5ur3TQzqzIfjM0J/2asmfXHQZ8jjY2NNDY20tnZydy5c6vdHDMbIdx1Y2aWcw56M7Occ9CbmeWcg97MLOcc9GZmOeegNzPLOQe9mVnOOejNzHLOQW9mlnMOejOznHPQm5nlnIPezCznHPRmZjnnoDczyzkHvZlZzjnozcxyzj88cgyTVPY8EVGBlpjZSFbyHr2ksZJ+KunO9PhkSfdKejL9Pamg7tWStkh6XNKFlWi4ZaHd1+2Ny+7sd5qZjT7ldN1cBXQXPF4ObIiIacCG9BhJ04GFwAzgIuBGSWOHprlmZlaukoJe0mTgEuCrBcXzgbZ0vw1YUFB+e0T0RMRTwBbgvKFprpmZlavUPvovAEuBEwvKaiNiR7r/LFCb7p8O3F9Qb1sqO4ykxcBigNraWjo7O0tvtQ3Ir6eNRHv27PG2WQUDBr2kS4GdEfFjSXP7qhMRIamsDuCIWA2sBpg1a1bMndvnom0w7l6PX08biTo7O71tVkEpe/SzgT+S9IfA8cBvSfpH4DlJkyJih6RJwM5UfzswpWD+yanMzMyqYMA++oi4OiImR8RUsoOs/xwRHwHWAYtStUXAHen+OmChpPGSzgSmAQ8OecvNzKwkRzOOfiWwVlIT8DRwGUBEbJK0FtgMHACuiIiDR91SMzMblLKCPiI6gc50/wVgXj/1WoCWo2ybmZkNAV8Cwcws5xz0ZmY556A3M8s5B72ZWc456M3Mcs5Bb2aWcw56M7Occ9CbmeWcg97MLOcc9GZmOeegNzPLOQe9mVnOOejNzHLOQW9mlnMOejOznHPQm5nlnIPezCznHPRmZjnnoDczyzkHvZlZzjnozcxyzkFvZpZzDnozs5xz0JuZ5ZyD3sws5xz0ZmY556A3M8s5B72ZWc456M3Mcs5Bb2aWcw56M7Occ9CbmeWcg97MLOcc9GZmOTdg0Es6XtKDkn4maZOka1P5yZLulfRk+ntSwTxXS9oi6XFJF1byCZiZ2ZGVskffA7w3It4GnA1cJOkdwHJgQ0RMAzakx0iaDiwEZgAXATdKGluJxpuZ2cAGDPrI7EkPx6VbAPOBtlTeBixI9+cDt0dET0Q8BWwBzhvSVpuZWclqSqmU9sh/DJwFfCUiHpBUGxE7UpVngdp0/3Tg/oLZt6Wy4mUuBhYD1NbW0tnZOagnYH3z62kj0Z49e7xtVkFJQR8RB4GzJb0B+Lak+qLpISnKWXFErAZWA8yaNSvmzp1bzux2JHevx6+njUSdnZ3eNqugrFE3EfEi0EHW9/6cpEkA6e/OVG07MKVgtsmpzMzMqqCUUTenpj15JE0A/gB4DFgHLErVFgF3pPvrgIWSxks6E5gGPDjUDTczs9KU0nUzCWhL/fRjgLURcaekHwJrJTUBTwOXAUTEJklrgc3AAeCK1PVjZmZVMGDQR8QjwMw+yl8A5vUzTwvQctStMzOzo+YzY83Mcs5Bb2aWcw56M7Occ9CbmeWcg97MLOcc9GZmOeegNzPLOQe9mVnOOejNzHLOQW9mlnMOejOznHPQm5nlnIPezCznHPRmZjnnoDczyzkHvZlZzjnozcxyzkFvZpZzDnozs5xz0JtZxbW3t1NfX8+8efOor6+nvb292k0aVQb8cXAzs6PR3t5Oc3Mzra2tHDx4kLFjx9LU1ARAY2NjlVs3OniP3swqqqWlhdbWVhoaGqipqaGhoYHW1lZaWlqq3bRRw0FvZhXV3d3NnDlzDiubM2cO3d3dVWrR6OOgN7OKqquro6ur67Cyrq4u6urqqtSi0cdBb2YV1dzcTFNTEx0dHRw4cICOjg6amppobm6udtNGDR+MNbOK6j3gumTJErq7u6mrq6OlpcUHYoeRg97MKq6xsZHGxkY6OzuZO3dutZsz6rjrxswqzuPoq8t79GZWUR5HX33eozezivI4+upz0JtZRXkcffU56M2sojyOvvoc9GZWUR5HX30+GGtmFeVx9NXnoDezivM4+uoasOtG0hRJHZI2S9ok6apUfrKkeyU9mf6eVDDP1ZK2SHpc0oWVfAJmZnZkpfTRHwA+GRHTgXcAV0iaDiwHNkTENGBDekyathCYAVwE3ChpbCUab2ZmAxsw6CNiR0T8JN3fDXQDpwPzgbZUrQ1YkO7PB26PiJ6IeArYApw31A03M7PSlNVHL2kqMBN4AKiNiB1p0rNAbbp/OnB/wWzbUlnxshYDiwFqa2vp7Owspyk2AL+eNhLt2bPH22YVlBz0kiYC3wT+IiJ+JenQtIgISVHOiiNiNbAaYNasWeEDNEPo7vU+4GUjkg/GVkdJ4+gljSML+dsi4lup+DlJk9L0ScDOVL4dmFIw++RUZmZmVVDKqBsBrUB3RFxfMGkdsCjdXwTcUVC+UNJ4SWcC04AHh67JZmZWjlK6bmYDHwUelfRwKvs0sBJYK6kJeBq4DCAiNklaC2wmG7FzRUQcHPKWm5lZSQYM+ojoAtTP5Hn9zNMC+NJ0ZmYjgK91Y2aWcw56M7Occ9CbmeWcg97MLOd89coR7m3Xfo+XXt5f9nxTl68vq/7rJ4zjZysuKHs9ZjbyOehHuJde3s/WlZeUNc9gzj4s94vBzI4d7roxs4prb2+nvr6eefPmUV9fT3t7e7WbNKp4j97MKqq9vZ3m5mZaW1s5ePAgY8eOpampCcC/MjVMvEdvZhXV0tJCa2srDQ0N1NTU0NDQQGtrKy0tPqdyuDjozayiuru7mTNnzmFlc+bMobu7u0otGn0c9GZWUXV1dXR1dR1W1tXVRV1dXZVaNPo46M2sopqbm2lqaqKjo4MDBw7Q0dFBU1MTzc3N1W7aqOGDsWZWUb0HXJcsWUJ3dzd1dXW0tLT4QOwwctCbWcU1NjbS2NjoX5iqEnfdmJnlnIPezCznHPRmZjnnoDczyzkHvZlZzjnozcxyzkFvZpZzDnozqzhfpri6fMKUmVWUL1Ncfd6jN7OK8mWKq89Bb2YV5csUV5+D3swqypcprj4HvZlVlC9TXH0+GGtmFeXLFFefg97MKs6XKa4ud92YWcV5HH11eY/ezCrK4+irz3v0ZlZRHkdffQ56M6soj6OvPnfdmFlF1dXVce211/Kd73zn0KibBQsWeBz9MBpwj17SGkk7JW0sKDtZ0r2Snkx/TyqYdrWkLZIel3RhpRpuZseGhoYGVq1axeWXX8769eu5/PLLWbVqFQ0NDdVu2qhRStfNrcBFRWXLgQ0RMQ3YkB4jaTqwEJiR5rlR0tgha62ZHXM6OjpYtmwZa9as4ZJLLmHNmjUsW7aMjo6Oajdt1Bgw6CPiX4BfFhXPB9rS/TZgQUH57RHRExFPAVuA84aorWZ2DOru7mbFihVs3LiRDRs2sHHjRlasWOE++mE02D762ojYke4/C9Sm+6cD9xfU25bKXkPSYmAxQG1tLZ2dnYNsSv6V+9rs2bNnUK+n3wOrhDPOOIMvf/nLzJw589C2+dOf/pQzzjjD29wwOeqDsRERkmIQ860GVgPMmjUrfLZcP+5eX/aZhIM6+3AQ6zErxXXXXXdoHP3xxx9PRHDDDTdw3XXXeZsbJoMN+uckTYqIHZImATtT+XZgSkG9yanMzEYpX+um+gY7jn4dsCjdXwTcUVC+UNJ4SWcC04AHj66JZnasa2xsPKyP3iE/vAbco5fUDswFTpG0DVgBrATWSmoCngYuA4iITZLWApuBA8AVEXGwQm03M7MSDBj0EdHfV++8fuq3AD632cxshPAlEMys4nz1yupy0JtZRbW3t3PVVVexd+9eAPbu3ctVV13lsB9GDnozq6ilS5dSU1PDmjVruOeee1izZg01NTUsXbq02k0bNRz0ZlZR27Zto62t7bDLFLe1tbFt27ZqN23UcNCbmeWcL1NsZhU1efJkFixYwP79+9m/fz/jxo1j3LhxTJ48udpNGzW8R29mFTV9+nT27dvHxIkTkcTEiRPZt28f06dPr3bTRg0HvZlV1H333cfs2bPZt28fEcG+ffuYPXs29913X7WbNmq468bMKqqnp4ft27dz1113Hfpx8Msvv5yenp5qN23UcNCPcCfWLectbcvLn7Ft4CqHrwfgkvLXYzYASVx88cU0NDQcurLqxRdfzE033VTtpo0aDvoRbnf3SrauLC+AB3OZ4qnL15dV36xUEcEtt9zCWWedxfTp07n++uu55ZZbiCj76uY2SA56M6uoGTNmMG3aND796U/T09PD+PHjufTSS3nyySer3bRRw0FvZhXV3NxMc3PzYX30TU1NtLT42ofDxUFvZhXlHx6pPge9mVVcY2MjjY2Ng/uZSztqDnozG3KSBjWfD9BWhk+YMrMhFxF93t647M5+pznkK8dBb2aWcw56M7Occ9CbmeWcg97MLOcc9GZmOeegNzPLOQe9mVnOOejNzHLOQW9mlnMOejOznHPQm5nlnC9qZmaD9rZrv8dLL+8va55yf83s9RPG8bMVF5Q1jx3OQX8MGNTP/N1d/ofJrFwvvby/rJ+69M9cVoeDfoQr9/diIftgDGY+M8sn99GbmeWcg97MLOcc9GZmOec+ejMbtBPrlvOWtuXlzdRW7joAfMzpaFQs6CVdBHwRGAt8NSJWVmpdZlYdu7tXetTNMaAiXTeSxgJfAS4GpgONkqZXYl1mZnZkldqjPw/YEhE/B5B0OzAf2Fyh9Y1Kkvqftqrvcv8Asw21sve4fY7HsKtU0J8O/KLg8Tbg7YUVJC0GFqeHeyQ9XqG2jEanALv6mnCkLwezYdDvtnkkumboG5ITbyylUtUOxkbEamB1tdafZ5IeiohZ1W6HWTFvm9VRqeGV24EpBY8npzIzMxtmlQr6HwHTJJ0p6ThgIbCuQusyM7MjqEjXTUQckHQlcA/Z8Mo1EbGpEuuyPrlLzEYqb5tVII/CMDPLN18Cwcws5xz0ZmY556A/BkhqlrRJ0iOSHpb09iPUvVXSB4azfTZ6STqYtsmNkv5J0gllzv9BSd2SOiTNkvSlVD5X0rsq0+rRxxc1G+EkvRO4FPj9iOiRdApwXJWbZdbr5Yg4G0DSbcCfA9f3TlR2hp4i4tV+5m8C/iwiutLjh9LfucAe4N8q0ejRxnv0I98kYFdE9ABExK6IeEbSZyT9KO1JrVYfp7xKOkfSfZJ+LOkeSZNS+cclbU7/Idw+zM/H8usHwFmSpkp6XNI/ABuBKZIaJT2attdVAJI+A8wBWiV9Lu3F3ylpKtkXxifSfwvvrtLzyQ0H/cj3PbIPyhOSbpT0nlT+5Yg4NyLqgQlke/2HSBoH3AB8ICLOAdYALWnycmBmRLyV7ANldlQk1ZBdxPDRVDQNuDEiZgD7gVXAe4GzgXMlLYiIz5LtwX84Ij7Vu6yI2ArcBHw+Is6OiB8M3zPJJwf9CBcRe4BzyK4L9DzwdUkfAxokPSDpUbIP0IyiWd8M1AP3SnoY+EuyM5QBHgFuk/QR4EDln4Xl2IS0fT0E/DvQmsqfjoj70/1zgc6IeD4iDgC3AecPf1NHL/fRHwMi4iDQCXSmYP9vwFuBWRHxC0nXAMcXzSZgU0S8s49FXkL2QXsf0CzpLekDaFauQ330vVIv4t7qNMf64j36EU7SmyVNKyg6G+i90ucuSROBvkbZPA6cmg7mImmcpBmSxgBTIqIDWAa8HphYuWdgxoPAeySdkn6rohG4b4B5dgMnVrxlo4T36Ee+icANkt5A1s2yhawb50WyA13Pkl1b6DAR8UoaZvklSa8ne6+/ADwB/GMqE/CliHhxWJ6JjUoRsUPScqCDbJtbHxF3DDDbd4FvSJoPLHE//dHxJRDMzHLOXTdmZjnnoDczyzkHvZlZzjnozcxyzkFvZpZzDnozs5xz0JuZ5dz/B6aOVBnTrqJQAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x25be8ff5668>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_subset.plot.box()\n",
    "plt.title(\"Boxplot of sales and profit\",fontsize=15)\n",
    "plt.ylim(0,500)\n",
    "plt.grid(True)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 4: A useful function – unique"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Kentucky', 'California', 'Florida', 'North Carolina',\n",
       "       'Washington', 'Texas', 'Wisconsin', 'Utah', 'Nebraska',\n",
       "       'Pennsylvania', 'Illinois', 'Minnesota', 'Michigan', 'Delaware',\n",
       "       'Indiana', 'New York', 'Arizona', 'Virginia', 'Tennessee',\n",
       "       'Alabama', 'South Carolina', 'Oregon', 'Colorado', 'Iowa', 'Ohio',\n",
       "       'Missouri', 'Oklahoma', 'New Mexico', 'Louisiana', 'Connecticut',\n",
       "       'New Jersey', 'Massachusetts', 'Georgia', 'Nevada', 'Rhode Island',\n",
       "       'Mississippi', 'Arkansas', 'Montana', 'New Hampshire', 'Maryland',\n",
       "       'District of Columbia', 'Kansas', 'Vermont', 'Maine',\n",
       "       'South Dakota', 'Idaho', 'North Dakota', 'Wyoming',\n",
       "       'West Virginia'], dtype=object)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['State'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "49"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['State'].nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['United States'], dtype=object)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Country'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.drop('Country',axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 5: Conditional Selection and Boolean Filtering"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>261.9600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>731.9400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>California</td>\n",
       "      <td>14.6200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Florida</td>\n",
       "      <td>957.5775</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Florida</td>\n",
       "      <td>22.3680</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>48.8600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>7.2800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>907.1520</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>18.5040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>114.9000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Ship Mode       State     Sales\n",
       "0    Second Class    Kentucky  261.9600\n",
       "1    Second Class    Kentucky  731.9400\n",
       "2    Second Class  California   14.6200\n",
       "3  Standard Class     Florida  957.5775\n",
       "4  Standard Class     Florida   22.3680\n",
       "5  Standard Class  California   48.8600\n",
       "6  Standard Class  California    7.2800\n",
       "7  Standard Class  California  907.1520\n",
       "8  Standard Class  California   18.5040\n",
       "9  Standard Class  California  114.9000"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_subset = df.loc[[i for i in range (10)],['Ship Mode','State','Sales']]\n",
    "df_subset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Ship Mode  State  Sales\n",
       "0       True   True   True\n",
       "1       True   True   True\n",
       "2       True   True  False\n",
       "3       True   True   True\n",
       "4       True   True  False\n",
       "5       True   True  False\n",
       "6       True   True  False\n",
       "7       True   True   True\n",
       "8       True   True  False\n",
       "9       True   True   True"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_subset>100"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>261.9600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>731.9400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>California</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Florida</td>\n",
       "      <td>957.5775</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Florida</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>907.1520</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>114.9000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Ship Mode       State     Sales\n",
       "0    Second Class    Kentucky  261.9600\n",
       "1    Second Class    Kentucky  731.9400\n",
       "2    Second Class  California       NaN\n",
       "3  Standard Class     Florida  957.5775\n",
       "4  Standard Class     Florida       NaN\n",
       "5  Standard Class  California       NaN\n",
       "6  Standard Class  California       NaN\n",
       "7  Standard Class  California  907.1520\n",
       "8  Standard Class  California       NaN\n",
       "9  Standard Class  California  114.9000"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_subset[df_subset>100]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>261.9600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>731.9400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Florida</td>\n",
       "      <td>957.5775</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>907.1520</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>114.9000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Ship Mode       State     Sales\n",
       "0    Second Class    Kentucky  261.9600\n",
       "1    Second Class    Kentucky  731.9400\n",
       "3  Standard Class     Florida  957.5775\n",
       "7  Standard Class  California  907.1520\n",
       "9  Standard Class  California  114.9000"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_subset[df_subset['Sales']>100]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>261.9600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>731.9400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Florida</td>\n",
       "      <td>957.5775</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Ship Mode     State     Sales\n",
       "0    Second Class  Kentucky  261.9600\n",
       "1    Second Class  Kentucky  731.9400\n",
       "3  Standard Class   Florida  957.5775"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_subset[(df_subset['State']!='California') & (df_subset['Sales']>100)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 6: Setting and re-setting index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "The DataFrame\n",
      "-------------------------\n",
      "   Age  Height  Weight\n",
      "A   22      66     140\n",
      "B   42      70     148\n",
      "C   30      62     125\n",
      "D   35      68     160\n",
      "E   25      62     152\n",
      "\n",
      "After resetting index\n",
      "-----------------------------------\n",
      "  index  Age  Height  Weight\n",
      "0     A   22      66     140\n",
      "1     B   42      70     148\n",
      "2     C   30      62     125\n",
      "3     D   35      68     160\n",
      "4     E   25      62     152\n",
      "\n",
      "After resetting index with 'drop' option TRUE\n",
      "---------------------------------------------\n",
      "   Age  Height  Weight\n",
      "0   22      66     140\n",
      "1   42      70     148\n",
      "2   30      62     125\n",
      "3   35      68     160\n",
      "4   25      62     152\n",
      "\n",
      "Adding a new column 'Profession'\n",
      "---------------------------------------------\n",
      "   Age  Height  Weight Profession\n",
      "A   22      66     140    Student\n",
      "B   42      70     148    Teacher\n",
      "C   30      62     125   Engineer\n",
      "D   35      68     160     Doctor\n",
      "E   25      62     152      Nurse\n",
      "\n",
      "Setting 'Profession' column as index\n",
      "---------------------------------------------\n",
      "            Age  Height  Weight\n",
      "Profession                     \n",
      "Student      22      66     140\n",
      "Teacher      42      70     148\n",
      "Engineer     30      62     125\n",
      "Doctor       35      68     160\n",
      "Nurse        25      62     152\n"
     ]
    }
   ],
   "source": [
    "matrix_data = np.matrix('22,66,140;42,70,148;30,62,125;35,68,160;25,62,152')\n",
    "row_labels = ['A','B','C','D','E']\n",
    "column_headings = ['Age', 'Height', 'Weight']\n",
    "\n",
    "df1 = pd.DataFrame(data=matrix_data, index=row_labels, columns=column_headings)\n",
    "print(\"\\nThe DataFrame\\n\",'-'*25, sep='')\n",
    "print(df1)\n",
    "print(\"\\nAfter resetting index\\n\",'-'*35, sep='')\n",
    "print(df1.reset_index())\n",
    "print(\"\\nAfter resetting index with 'drop' option TRUE\\n\",'-'*45, sep='')\n",
    "print(df1.reset_index(drop=True))\n",
    "print(\"\\nAdding a new column 'Profession'\\n\",'-'*45, sep='')\n",
    "df1['Profession'] = \"Student Teacher Engineer Doctor Nurse\".split()\n",
    "print(df1)\n",
    "print(\"\\nSetting 'Profession' column as index\\n\",'-'*45, sep='')\n",
    "print (df1.set_index('Profession'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 7: GroupBy method"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>261.9600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>731.9400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Second Class</td>\n",
       "      <td>California</td>\n",
       "      <td>14.6200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Florida</td>\n",
       "      <td>957.5775</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Florida</td>\n",
       "      <td>22.3680</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>48.8600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>7.2800</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>907.1520</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>18.5040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Standard Class</td>\n",
       "      <td>California</td>\n",
       "      <td>114.9000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Ship Mode       State     Sales\n",
       "0    Second Class    Kentucky  261.9600\n",
       "1    Second Class    Kentucky  731.9400\n",
       "2    Second Class  California   14.6200\n",
       "3  Standard Class     Florida  957.5775\n",
       "4  Standard Class     Florida   22.3680\n",
       "5  Standard Class  California   48.8600\n",
       "6  Standard Class  California    7.2800\n",
       "7  Standard Class  California  907.1520\n",
       "8  Standard Class  California   18.5040\n",
       "9  Standard Class  California  114.9000"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_subset = df.loc[[i for i in range (10)],['Ship Mode','State','Sales']]\n",
    "df_subset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "byState = df_subset.groupby('State')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pandas.core.groupby.groupby.DataFrameGroupBy object at 0x0000025BE8FF5E48>"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "byState"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Grouping by 'State' column and listing mean sales\n",
      "--------------------------------------------------\n",
      "                 Sales\n",
      "State                 \n",
      "California  185.219333\n",
      "Florida     489.972750\n",
      "Kentucky    496.950000\n"
     ]
    }
   ],
   "source": [
    "print(\"\\nGrouping by 'State' column and listing mean sales\\n\",'-'*50, sep='')\n",
    "print(byState.mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Grouping by 'State' column and listing total sum of sales\n",
      "--------------------------------------------------\n",
      "                Sales\n",
      "State                \n",
      "California  1111.3160\n",
      "Florida      979.9455\n",
      "Kentucky     993.9000\n"
     ]
    }
   ],
   "source": [
    "print(\"\\nGrouping by 'State' column and listing total sum of sales\\n\",'-'*50, sep='')\n",
    "print(byState.sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "           Sales                                                              \n",
      "           count        mean         std   min     25%     50%    75%      max\n",
      "California   6.0  185.219333  355.889307  7.28  15.591  33.682  98.39  907.152\n"
     ]
    }
   ],
   "source": [
    "print(pd.DataFrame(df_subset.groupby('State').describe().loc['California']).transpose())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"8\" halign=\"left\">Sales</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Ship Mode</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Second Class</th>\n",
       "      <td>3.0</td>\n",
       "      <td>336.173333</td>\n",
       "      <td>364.373037</td>\n",
       "      <td>14.62</td>\n",
       "      <td>138.290</td>\n",
       "      <td>261.96</td>\n",
       "      <td>496.950</td>\n",
       "      <td>731.9400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Standard Class</th>\n",
       "      <td>7.0</td>\n",
       "      <td>296.663071</td>\n",
       "      <td>435.947552</td>\n",
       "      <td>7.28</td>\n",
       "      <td>20.436</td>\n",
       "      <td>48.86</td>\n",
       "      <td>511.026</td>\n",
       "      <td>957.5775</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               Sales                                                           \\\n",
       "               count        mean         std    min      25%     50%      75%   \n",
       "Ship Mode                                                                       \n",
       "Second Class     3.0  336.173333  364.373037  14.62  138.290  261.96  496.950   \n",
       "Standard Class   7.0  296.663071  435.947552   7.28   20.436   48.86  511.026   \n",
       "\n",
       "                          \n",
       "                     max  \n",
       "Ship Mode                 \n",
       "Second Class    731.9400  \n",
       "Standard Class  957.5775  "
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_subset.groupby('Ship Mode').describe().loc[['Second Class','Standard Class']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>California</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"8\" valign=\"top\">Sales</th>\n",
       "      <th>count</th>\n",
       "      <td>6.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>185.219333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>355.889307</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>7.280000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>15.591000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>33.682000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>98.390000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>907.152000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             California\n",
       "Sales count    6.000000\n",
       "      mean   185.219333\n",
       "      std    355.889307\n",
       "      min      7.280000\n",
       "      25%     15.591000\n",
       "      50%     33.682000\n",
       "      75%     98.390000\n",
       "      max    907.152000"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(byState.describe().loc['California'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "byStateCity=df.groupby(['State','City'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>State</th>\n",
       "      <th>City</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"8\" valign=\"top\">Alabama</th>\n",
       "      <th>Auburn</th>\n",
       "      <td>6.0</td>\n",
       "      <td>294.471667</td>\n",
       "      <td>361.914543</td>\n",
       "      <td>3.760</td>\n",
       "      <td>8.8050</td>\n",
       "      <td>182.0300</td>\n",
       "      <td>456.40750</td>\n",
       "      <td>900.080</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Decatur</th>\n",
       "      <td>13.0</td>\n",
       "      <td>259.601538</td>\n",
       "      <td>385.660903</td>\n",
       "      <td>14.940</td>\n",
       "      <td>23.9200</td>\n",
       "      <td>44.9500</td>\n",
       "      <td>239.92000</td>\n",
       "      <td>1215.920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Florence</th>\n",
       "      <td>5.0</td>\n",
       "      <td>399.470000</td>\n",
       "      <td>796.488863</td>\n",
       "      <td>4.980</td>\n",
       "      <td>7.2700</td>\n",
       "      <td>12.4800</td>\n",
       "      <td>152.76000</td>\n",
       "      <td>1819.860</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Hoover</th>\n",
       "      <td>4.0</td>\n",
       "      <td>131.462500</td>\n",
       "      <td>230.646923</td>\n",
       "      <td>7.160</td>\n",
       "      <td>13.3925</td>\n",
       "      <td>20.7250</td>\n",
       "      <td>138.79500</td>\n",
       "      <td>477.240</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Huntsville</th>\n",
       "      <td>10.0</td>\n",
       "      <td>248.437000</td>\n",
       "      <td>419.576667</td>\n",
       "      <td>3.620</td>\n",
       "      <td>26.8700</td>\n",
       "      <td>81.9200</td>\n",
       "      <td>171.80750</td>\n",
       "      <td>1319.960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Mobile</th>\n",
       "      <td>11.0</td>\n",
       "      <td>496.635455</td>\n",
       "      <td>914.087425</td>\n",
       "      <td>8.960</td>\n",
       "      <td>46.8600</td>\n",
       "      <td>70.9800</td>\n",
       "      <td>505.96500</td>\n",
       "      <td>3040.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Montgomery</th>\n",
       "      <td>10.0</td>\n",
       "      <td>372.273000</td>\n",
       "      <td>475.397645</td>\n",
       "      <td>10.160</td>\n",
       "      <td>21.7075</td>\n",
       "      <td>187.2150</td>\n",
       "      <td>499.05500</td>\n",
       "      <td>1394.950</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tuscaloosa</th>\n",
       "      <td>2.0</td>\n",
       "      <td>87.850000</td>\n",
       "      <td>76.523096</td>\n",
       "      <td>33.740</td>\n",
       "      <td>60.7950</td>\n",
       "      <td>87.8500</td>\n",
       "      <td>114.90500</td>\n",
       "      <td>141.960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"13\" valign=\"top\">Arizona</th>\n",
       "      <th>Avondale</th>\n",
       "      <td>6.0</td>\n",
       "      <td>157.801333</td>\n",
       "      <td>288.247527</td>\n",
       "      <td>14.576</td>\n",
       "      <td>18.1480</td>\n",
       "      <td>35.5960</td>\n",
       "      <td>88.67800</td>\n",
       "      <td>742.336</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Bullhead City</th>\n",
       "      <td>2.0</td>\n",
       "      <td>11.144000</td>\n",
       "      <td>4.559425</td>\n",
       "      <td>7.920</td>\n",
       "      <td>9.5320</td>\n",
       "      <td>11.1440</td>\n",
       "      <td>12.75600</td>\n",
       "      <td>14.368</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Chandler</th>\n",
       "      <td>7.0</td>\n",
       "      <td>153.821000</td>\n",
       "      <td>305.283748</td>\n",
       "      <td>8.544</td>\n",
       "      <td>9.1200</td>\n",
       "      <td>49.7920</td>\n",
       "      <td>78.89750</td>\n",
       "      <td>842.376</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Gilbert</th>\n",
       "      <td>15.0</td>\n",
       "      <td>278.158800</td>\n",
       "      <td>346.945589</td>\n",
       "      <td>5.904</td>\n",
       "      <td>36.1240</td>\n",
       "      <td>82.3680</td>\n",
       "      <td>375.80700</td>\n",
       "      <td>1113.024</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Glendale</th>\n",
       "      <td>23.0</td>\n",
       "      <td>126.863696</td>\n",
       "      <td>225.003236</td>\n",
       "      <td>2.368</td>\n",
       "      <td>14.8760</td>\n",
       "      <td>42.9760</td>\n",
       "      <td>109.13200</td>\n",
       "      <td>933.536</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Mesa</th>\n",
       "      <td>28.0</td>\n",
       "      <td>144.205000</td>\n",
       "      <td>155.275947</td>\n",
       "      <td>4.368</td>\n",
       "      <td>31.7640</td>\n",
       "      <td>81.6515</td>\n",
       "      <td>202.90250</td>\n",
       "      <td>552.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Peoria</th>\n",
       "      <td>16.0</td>\n",
       "      <td>83.834500</td>\n",
       "      <td>88.768365</td>\n",
       "      <td>4.536</td>\n",
       "      <td>14.8920</td>\n",
       "      <td>68.1540</td>\n",
       "      <td>92.42600</td>\n",
       "      <td>280.792</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Phoenix</th>\n",
       "      <td>63.0</td>\n",
       "      <td>174.607254</td>\n",
       "      <td>322.324198</td>\n",
       "      <td>1.408</td>\n",
       "      <td>12.8145</td>\n",
       "      <td>46.8720</td>\n",
       "      <td>193.96400</td>\n",
       "      <td>1879.960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Scottsdale</th>\n",
       "      <td>12.0</td>\n",
       "      <td>122.192250</td>\n",
       "      <td>103.500825</td>\n",
       "      <td>4.401</td>\n",
       "      <td>30.1230</td>\n",
       "      <td>110.3040</td>\n",
       "      <td>186.89850</td>\n",
       "      <td>307.776</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sierra Vista</th>\n",
       "      <td>3.0</td>\n",
       "      <td>25.357333</td>\n",
       "      <td>9.543341</td>\n",
       "      <td>14.368</td>\n",
       "      <td>22.2560</td>\n",
       "      <td>30.1440</td>\n",
       "      <td>30.85200</td>\n",
       "      <td>31.560</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tempe</th>\n",
       "      <td>13.0</td>\n",
       "      <td>82.330923</td>\n",
       "      <td>119.755669</td>\n",
       "      <td>3.366</td>\n",
       "      <td>8.3760</td>\n",
       "      <td>12.7680</td>\n",
       "      <td>79.40000</td>\n",
       "      <td>318.400</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tucson</th>\n",
       "      <td>32.0</td>\n",
       "      <td>197.281750</td>\n",
       "      <td>242.004135</td>\n",
       "      <td>4.272</td>\n",
       "      <td>31.3200</td>\n",
       "      <td>95.9890</td>\n",
       "      <td>243.54000</td>\n",
       "      <td>1023.936</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Yuma</th>\n",
       "      <td>4.0</td>\n",
       "      <td>210.216250</td>\n",
       "      <td>270.654379</td>\n",
       "      <td>10.496</td>\n",
       "      <td>36.2660</td>\n",
       "      <td>115.1920</td>\n",
       "      <td>289.14225</td>\n",
       "      <td>599.985</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"9\" valign=\"top\">Arkansas</th>\n",
       "      <th>Conway</th>\n",
       "      <td>1.0</td>\n",
       "      <td>301.960000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>301.960</td>\n",
       "      <td>301.9600</td>\n",
       "      <td>301.9600</td>\n",
       "      <td>301.96000</td>\n",
       "      <td>301.960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Fayetteville</th>\n",
       "      <td>14.0</td>\n",
       "      <td>267.343571</td>\n",
       "      <td>482.533092</td>\n",
       "      <td>6.240</td>\n",
       "      <td>19.5525</td>\n",
       "      <td>75.0850</td>\n",
       "      <td>297.83750</td>\n",
       "      <td>1793.980</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Hot Springs</th>\n",
       "      <td>4.0</td>\n",
       "      <td>61.457500</td>\n",
       "      <td>65.669769</td>\n",
       "      <td>25.920</td>\n",
       "      <td>28.9725</td>\n",
       "      <td>29.9950</td>\n",
       "      <td>62.48000</td>\n",
       "      <td>159.920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Jonesboro</th>\n",
       "      <td>11.0</td>\n",
       "      <td>265.029091</td>\n",
       "      <td>366.221274</td>\n",
       "      <td>6.630</td>\n",
       "      <td>23.1500</td>\n",
       "      <td>59.9800</td>\n",
       "      <td>439.16000</td>\n",
       "      <td>1067.940</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Little Rock</th>\n",
       "      <td>24.0</td>\n",
       "      <td>148.347917</td>\n",
       "      <td>206.445952</td>\n",
       "      <td>11.160</td>\n",
       "      <td>19.0700</td>\n",
       "      <td>60.9900</td>\n",
       "      <td>180.69000</td>\n",
       "      <td>881.930</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Pine Bluff</th>\n",
       "      <td>2.0</td>\n",
       "      <td>106.455000</td>\n",
       "      <td>132.221897</td>\n",
       "      <td>12.960</td>\n",
       "      <td>59.7075</td>\n",
       "      <td>106.4550</td>\n",
       "      <td>153.20250</td>\n",
       "      <td>199.950</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Rogers</th>\n",
       "      <td>1.0</td>\n",
       "      <td>40.410000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>40.410</td>\n",
       "      <td>40.4100</td>\n",
       "      <td>40.4100</td>\n",
       "      <td>40.41000</td>\n",
       "      <td>40.410</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Springdale</th>\n",
       "      <td>1.0</td>\n",
       "      <td>4.300000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.300</td>\n",
       "      <td>4.3000</td>\n",
       "      <td>4.3000</td>\n",
       "      <td>4.30000</td>\n",
       "      <td>4.300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Texarkana</th>\n",
       "      <td>2.0</td>\n",
       "      <td>327.120000</td>\n",
       "      <td>393.462497</td>\n",
       "      <td>48.900</td>\n",
       "      <td>188.0100</td>\n",
       "      <td>327.1200</td>\n",
       "      <td>466.23000</td>\n",
       "      <td>605.340</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"15\" valign=\"top\">Washington</th>\n",
       "      <th>Bellingham</th>\n",
       "      <td>3.0</td>\n",
       "      <td>1263.413333</td>\n",
       "      <td>1327.859461</td>\n",
       "      <td>25.120</td>\n",
       "      <td>562.3100</td>\n",
       "      <td>1099.5000</td>\n",
       "      <td>1882.56000</td>\n",
       "      <td>2665.620</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Covington</th>\n",
       "      <td>4.0</td>\n",
       "      <td>103.420000</td>\n",
       "      <td>93.849114</td>\n",
       "      <td>29.900</td>\n",
       "      <td>42.6500</td>\n",
       "      <td>73.4500</td>\n",
       "      <td>134.22000</td>\n",
       "      <td>236.880</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Des Moines</th>\n",
       "      <td>7.0</td>\n",
       "      <td>493.491143</td>\n",
       "      <td>665.676105</td>\n",
       "      <td>18.540</td>\n",
       "      <td>86.7940</td>\n",
       "      <td>215.9760</td>\n",
       "      <td>623.18200</td>\n",
       "      <td>1799.970</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Edmonds</th>\n",
       "      <td>14.0</td>\n",
       "      <td>180.263714</td>\n",
       "      <td>344.045236</td>\n",
       "      <td>7.380</td>\n",
       "      <td>21.1450</td>\n",
       "      <td>54.0420</td>\n",
       "      <td>155.47250</td>\n",
       "      <td>1298.550</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Everett</th>\n",
       "      <td>1.0</td>\n",
       "      <td>3.856000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.856</td>\n",
       "      <td>3.8560</td>\n",
       "      <td>3.8560</td>\n",
       "      <td>3.85600</td>\n",
       "      <td>3.856</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Kent</th>\n",
       "      <td>8.0</td>\n",
       "      <td>168.906750</td>\n",
       "      <td>257.793525</td>\n",
       "      <td>19.440</td>\n",
       "      <td>38.4920</td>\n",
       "      <td>60.9880</td>\n",
       "      <td>168.21250</td>\n",
       "      <td>786.480</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Longview</th>\n",
       "      <td>3.0</td>\n",
       "      <td>39.736667</td>\n",
       "      <td>31.681749</td>\n",
       "      <td>18.240</td>\n",
       "      <td>21.5450</td>\n",
       "      <td>24.8500</td>\n",
       "      <td>50.48500</td>\n",
       "      <td>76.120</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Marysville</th>\n",
       "      <td>2.0</td>\n",
       "      <td>51.090000</td>\n",
       "      <td>59.778807</td>\n",
       "      <td>8.820</td>\n",
       "      <td>29.9550</td>\n",
       "      <td>51.0900</td>\n",
       "      <td>72.22500</td>\n",
       "      <td>93.360</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Olympia</th>\n",
       "      <td>5.0</td>\n",
       "      <td>204.089600</td>\n",
       "      <td>236.364267</td>\n",
       "      <td>14.030</td>\n",
       "      <td>45.6800</td>\n",
       "      <td>155.2500</td>\n",
       "      <td>201.56800</td>\n",
       "      <td>603.920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Pasco</th>\n",
       "      <td>6.0</td>\n",
       "      <td>366.852000</td>\n",
       "      <td>356.325643</td>\n",
       "      <td>5.880</td>\n",
       "      <td>97.5540</td>\n",
       "      <td>352.3200</td>\n",
       "      <td>464.61000</td>\n",
       "      <td>975.920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Redmond</th>\n",
       "      <td>3.0</td>\n",
       "      <td>18.410000</td>\n",
       "      <td>5.473783</td>\n",
       "      <td>12.320</td>\n",
       "      <td>16.1550</td>\n",
       "      <td>19.9900</td>\n",
       "      <td>21.45500</td>\n",
       "      <td>22.920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Renton</th>\n",
       "      <td>3.0</td>\n",
       "      <td>414.210667</td>\n",
       "      <td>544.196636</td>\n",
       "      <td>51.840</td>\n",
       "      <td>101.3200</td>\n",
       "      <td>150.8000</td>\n",
       "      <td>595.39600</td>\n",
       "      <td>1039.992</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Seattle</th>\n",
       "      <td>428.0</td>\n",
       "      <td>279.300799</td>\n",
       "      <td>823.990115</td>\n",
       "      <td>1.344</td>\n",
       "      <td>20.6905</td>\n",
       "      <td>65.6400</td>\n",
       "      <td>201.21000</td>\n",
       "      <td>13999.960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Spokane</th>\n",
       "      <td>7.0</td>\n",
       "      <td>289.702571</td>\n",
       "      <td>300.735758</td>\n",
       "      <td>23.840</td>\n",
       "      <td>103.6200</td>\n",
       "      <td>149.7300</td>\n",
       "      <td>404.75400</td>\n",
       "      <td>837.600</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Vancouver</th>\n",
       "      <td>5.0</td>\n",
       "      <td>137.367200</td>\n",
       "      <td>157.470820</td>\n",
       "      <td>9.640</td>\n",
       "      <td>14.8000</td>\n",
       "      <td>44.0200</td>\n",
       "      <td>302.37600</td>\n",
       "      <td>316.000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>West Virginia</th>\n",
       "      <th>Wheeling</th>\n",
       "      <td>4.0</td>\n",
       "      <td>302.456000</td>\n",
       "      <td>313.508627</td>\n",
       "      <td>6.240</td>\n",
       "      <td>63.3600</td>\n",
       "      <td>265.1200</td>\n",
       "      <td>504.21600</td>\n",
       "      <td>673.344</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"13\" valign=\"top\">Wisconsin</th>\n",
       "      <th>Appleton</th>\n",
       "      <td>2.0</td>\n",
       "      <td>835.655000</td>\n",
       "      <td>1151.304190</td>\n",
       "      <td>21.560</td>\n",
       "      <td>428.6075</td>\n",
       "      <td>835.6550</td>\n",
       "      <td>1242.70250</td>\n",
       "      <td>1649.750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Eau Claire</th>\n",
       "      <td>6.0</td>\n",
       "      <td>274.275000</td>\n",
       "      <td>237.093940</td>\n",
       "      <td>32.560</td>\n",
       "      <td>117.6700</td>\n",
       "      <td>217.7700</td>\n",
       "      <td>364.37000</td>\n",
       "      <td>680.010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Franklin</th>\n",
       "      <td>9.0</td>\n",
       "      <td>530.472222</td>\n",
       "      <td>715.606149</td>\n",
       "      <td>3.600</td>\n",
       "      <td>106.0500</td>\n",
       "      <td>171.5500</td>\n",
       "      <td>392.94000</td>\n",
       "      <td>1951.840</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Green Bay</th>\n",
       "      <td>4.0</td>\n",
       "      <td>131.830000</td>\n",
       "      <td>224.726888</td>\n",
       "      <td>16.740</td>\n",
       "      <td>18.4050</td>\n",
       "      <td>20.8400</td>\n",
       "      <td>134.26500</td>\n",
       "      <td>468.900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Kenosha</th>\n",
       "      <td>9.0</td>\n",
       "      <td>434.081111</td>\n",
       "      <td>301.753460</td>\n",
       "      <td>14.980</td>\n",
       "      <td>139.9500</td>\n",
       "      <td>399.9500</td>\n",
       "      <td>699.98000</td>\n",
       "      <td>860.930</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>La Crosse</th>\n",
       "      <td>5.0</td>\n",
       "      <td>166.082000</td>\n",
       "      <td>200.481230</td>\n",
       "      <td>3.040</td>\n",
       "      <td>56.8200</td>\n",
       "      <td>68.6400</td>\n",
       "      <td>201.96000</td>\n",
       "      <td>499.950</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Madison</th>\n",
       "      <td>10.0</td>\n",
       "      <td>534.679000</td>\n",
       "      <td>875.980874</td>\n",
       "      <td>1.810</td>\n",
       "      <td>32.3000</td>\n",
       "      <td>119.5850</td>\n",
       "      <td>614.39000</td>\n",
       "      <td>2807.840</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Milwaukee</th>\n",
       "      <td>45.0</td>\n",
       "      <td>253.560444</td>\n",
       "      <td>376.458437</td>\n",
       "      <td>5.820</td>\n",
       "      <td>29.3400</td>\n",
       "      <td>92.5200</td>\n",
       "      <td>272.40000</td>\n",
       "      <td>1526.560</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sheboygan</th>\n",
       "      <td>4.0</td>\n",
       "      <td>19.935000</td>\n",
       "      <td>15.126715</td>\n",
       "      <td>1.980</td>\n",
       "      <td>11.7225</td>\n",
       "      <td>20.0850</td>\n",
       "      <td>28.29750</td>\n",
       "      <td>37.590</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Superior</th>\n",
       "      <td>9.0</td>\n",
       "      <td>144.414444</td>\n",
       "      <td>213.394065</td>\n",
       "      <td>5.560</td>\n",
       "      <td>17.1200</td>\n",
       "      <td>47.4000</td>\n",
       "      <td>125.99000</td>\n",
       "      <td>629.100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Waukesha</th>\n",
       "      <td>1.0</td>\n",
       "      <td>54.500000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>54.500</td>\n",
       "      <td>54.5000</td>\n",
       "      <td>54.5000</td>\n",
       "      <td>54.50000</td>\n",
       "      <td>54.500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Wausau</th>\n",
       "      <td>4.0</td>\n",
       "      <td>79.370000</td>\n",
       "      <td>111.450605</td>\n",
       "      <td>12.390</td>\n",
       "      <td>20.0325</td>\n",
       "      <td>29.6050</td>\n",
       "      <td>88.94250</td>\n",
       "      <td>245.880</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>West Allis</th>\n",
       "      <td>2.0</td>\n",
       "      <td>125.240000</td>\n",
       "      <td>165.067007</td>\n",
       "      <td>8.520</td>\n",
       "      <td>66.8800</td>\n",
       "      <td>125.2400</td>\n",
       "      <td>183.60000</td>\n",
       "      <td>241.960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Wyoming</th>\n",
       "      <th>Cheyenne</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1603.136000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1603.136</td>\n",
       "      <td>1603.1360</td>\n",
       "      <td>1603.1360</td>\n",
       "      <td>1603.13600</td>\n",
       "      <td>1603.136</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>604 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                             count         mean          std       min  \\\n",
       "State         City                                                       \n",
       "Alabama       Auburn           6.0   294.471667   361.914543     3.760   \n",
       "              Decatur         13.0   259.601538   385.660903    14.940   \n",
       "              Florence         5.0   399.470000   796.488863     4.980   \n",
       "              Hoover           4.0   131.462500   230.646923     7.160   \n",
       "              Huntsville      10.0   248.437000   419.576667     3.620   \n",
       "              Mobile          11.0   496.635455   914.087425     8.960   \n",
       "              Montgomery      10.0   372.273000   475.397645    10.160   \n",
       "              Tuscaloosa       2.0    87.850000    76.523096    33.740   \n",
       "Arizona       Avondale         6.0   157.801333   288.247527    14.576   \n",
       "              Bullhead City    2.0    11.144000     4.559425     7.920   \n",
       "              Chandler         7.0   153.821000   305.283748     8.544   \n",
       "              Gilbert         15.0   278.158800   346.945589     5.904   \n",
       "              Glendale        23.0   126.863696   225.003236     2.368   \n",
       "              Mesa            28.0   144.205000   155.275947     4.368   \n",
       "              Peoria          16.0    83.834500    88.768365     4.536   \n",
       "              Phoenix         63.0   174.607254   322.324198     1.408   \n",
       "              Scottsdale      12.0   122.192250   103.500825     4.401   \n",
       "              Sierra Vista     3.0    25.357333     9.543341    14.368   \n",
       "              Tempe           13.0    82.330923   119.755669     3.366   \n",
       "              Tucson          32.0   197.281750   242.004135     4.272   \n",
       "              Yuma             4.0   210.216250   270.654379    10.496   \n",
       "Arkansas      Conway           1.0   301.960000          NaN   301.960   \n",
       "              Fayetteville    14.0   267.343571   482.533092     6.240   \n",
       "              Hot Springs      4.0    61.457500    65.669769    25.920   \n",
       "              Jonesboro       11.0   265.029091   366.221274     6.630   \n",
       "              Little Rock     24.0   148.347917   206.445952    11.160   \n",
       "              Pine Bluff       2.0   106.455000   132.221897    12.960   \n",
       "              Rogers           1.0    40.410000          NaN    40.410   \n",
       "              Springdale       1.0     4.300000          NaN     4.300   \n",
       "              Texarkana        2.0   327.120000   393.462497    48.900   \n",
       "...                            ...          ...          ...       ...   \n",
       "Washington    Bellingham       3.0  1263.413333  1327.859461    25.120   \n",
       "              Covington        4.0   103.420000    93.849114    29.900   \n",
       "              Des Moines       7.0   493.491143   665.676105    18.540   \n",
       "              Edmonds         14.0   180.263714   344.045236     7.380   \n",
       "              Everett          1.0     3.856000          NaN     3.856   \n",
       "              Kent             8.0   168.906750   257.793525    19.440   \n",
       "              Longview         3.0    39.736667    31.681749    18.240   \n",
       "              Marysville       2.0    51.090000    59.778807     8.820   \n",
       "              Olympia          5.0   204.089600   236.364267    14.030   \n",
       "              Pasco            6.0   366.852000   356.325643     5.880   \n",
       "              Redmond          3.0    18.410000     5.473783    12.320   \n",
       "              Renton           3.0   414.210667   544.196636    51.840   \n",
       "              Seattle        428.0   279.300799   823.990115     1.344   \n",
       "              Spokane          7.0   289.702571   300.735758    23.840   \n",
       "              Vancouver        5.0   137.367200   157.470820     9.640   \n",
       "West Virginia Wheeling         4.0   302.456000   313.508627     6.240   \n",
       "Wisconsin     Appleton         2.0   835.655000  1151.304190    21.560   \n",
       "              Eau Claire       6.0   274.275000   237.093940    32.560   \n",
       "              Franklin         9.0   530.472222   715.606149     3.600   \n",
       "              Green Bay        4.0   131.830000   224.726888    16.740   \n",
       "              Kenosha          9.0   434.081111   301.753460    14.980   \n",
       "              La Crosse        5.0   166.082000   200.481230     3.040   \n",
       "              Madison         10.0   534.679000   875.980874     1.810   \n",
       "              Milwaukee       45.0   253.560444   376.458437     5.820   \n",
       "              Sheboygan        4.0    19.935000    15.126715     1.980   \n",
       "              Superior         9.0   144.414444   213.394065     5.560   \n",
       "              Waukesha         1.0    54.500000          NaN    54.500   \n",
       "              Wausau           4.0    79.370000   111.450605    12.390   \n",
       "              West Allis       2.0   125.240000   165.067007     8.520   \n",
       "Wyoming       Cheyenne         1.0  1603.136000          NaN  1603.136   \n",
       "\n",
       "                                   25%        50%         75%        max  \n",
       "State         City                                                        \n",
       "Alabama       Auburn            8.8050   182.0300   456.40750    900.080  \n",
       "              Decatur          23.9200    44.9500   239.92000   1215.920  \n",
       "              Florence          7.2700    12.4800   152.76000   1819.860  \n",
       "              Hoover           13.3925    20.7250   138.79500    477.240  \n",
       "              Huntsville       26.8700    81.9200   171.80750   1319.960  \n",
       "              Mobile           46.8600    70.9800   505.96500   3040.000  \n",
       "              Montgomery       21.7075   187.2150   499.05500   1394.950  \n",
       "              Tuscaloosa       60.7950    87.8500   114.90500    141.960  \n",
       "Arizona       Avondale         18.1480    35.5960    88.67800    742.336  \n",
       "              Bullhead City     9.5320    11.1440    12.75600     14.368  \n",
       "              Chandler          9.1200    49.7920    78.89750    842.376  \n",
       "              Gilbert          36.1240    82.3680   375.80700   1113.024  \n",
       "              Glendale         14.8760    42.9760   109.13200    933.536  \n",
       "              Mesa             31.7640    81.6515   202.90250    552.000  \n",
       "              Peoria           14.8920    68.1540    92.42600    280.792  \n",
       "              Phoenix          12.8145    46.8720   193.96400   1879.960  \n",
       "              Scottsdale       30.1230   110.3040   186.89850    307.776  \n",
       "              Sierra Vista     22.2560    30.1440    30.85200     31.560  \n",
       "              Tempe             8.3760    12.7680    79.40000    318.400  \n",
       "              Tucson           31.3200    95.9890   243.54000   1023.936  \n",
       "              Yuma             36.2660   115.1920   289.14225    599.985  \n",
       "Arkansas      Conway          301.9600   301.9600   301.96000    301.960  \n",
       "              Fayetteville     19.5525    75.0850   297.83750   1793.980  \n",
       "              Hot Springs      28.9725    29.9950    62.48000    159.920  \n",
       "              Jonesboro        23.1500    59.9800   439.16000   1067.940  \n",
       "              Little Rock      19.0700    60.9900   180.69000    881.930  \n",
       "              Pine Bluff       59.7075   106.4550   153.20250    199.950  \n",
       "              Rogers           40.4100    40.4100    40.41000     40.410  \n",
       "              Springdale        4.3000     4.3000     4.30000      4.300  \n",
       "              Texarkana       188.0100   327.1200   466.23000    605.340  \n",
       "...                                ...        ...         ...        ...  \n",
       "Washington    Bellingham      562.3100  1099.5000  1882.56000   2665.620  \n",
       "              Covington        42.6500    73.4500   134.22000    236.880  \n",
       "              Des Moines       86.7940   215.9760   623.18200   1799.970  \n",
       "              Edmonds          21.1450    54.0420   155.47250   1298.550  \n",
       "              Everett           3.8560     3.8560     3.85600      3.856  \n",
       "              Kent             38.4920    60.9880   168.21250    786.480  \n",
       "              Longview         21.5450    24.8500    50.48500     76.120  \n",
       "              Marysville       29.9550    51.0900    72.22500     93.360  \n",
       "              Olympia          45.6800   155.2500   201.56800    603.920  \n",
       "              Pasco            97.5540   352.3200   464.61000    975.920  \n",
       "              Redmond          16.1550    19.9900    21.45500     22.920  \n",
       "              Renton          101.3200   150.8000   595.39600   1039.992  \n",
       "              Seattle          20.6905    65.6400   201.21000  13999.960  \n",
       "              Spokane         103.6200   149.7300   404.75400    837.600  \n",
       "              Vancouver        14.8000    44.0200   302.37600    316.000  \n",
       "West Virginia Wheeling         63.3600   265.1200   504.21600    673.344  \n",
       "Wisconsin     Appleton        428.6075   835.6550  1242.70250   1649.750  \n",
       "              Eau Claire      117.6700   217.7700   364.37000    680.010  \n",
       "              Franklin        106.0500   171.5500   392.94000   1951.840  \n",
       "              Green Bay        18.4050    20.8400   134.26500    468.900  \n",
       "              Kenosha         139.9500   399.9500   699.98000    860.930  \n",
       "              La Crosse        56.8200    68.6400   201.96000    499.950  \n",
       "              Madison          32.3000   119.5850   614.39000   2807.840  \n",
       "              Milwaukee        29.3400    92.5200   272.40000   1526.560  \n",
       "              Sheboygan        11.7225    20.0850    28.29750     37.590  \n",
       "              Superior         17.1200    47.4000   125.99000    629.100  \n",
       "              Waukesha         54.5000    54.5000    54.50000     54.500  \n",
       "              Wausau           20.0325    29.6050    88.94250    245.880  \n",
       "              West Allis       66.8800   125.2400   183.60000    241.960  \n",
       "Wyoming       Cheyenne       1603.1360  1603.1360  1603.13600   1603.136  \n",
       "\n",
       "[604 rows x 8 columns]"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "byStateCity.describe()['Sales']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 8: Missing values in Pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_missing=pd.read_excel(\"Sample - Superstore.xls\",sheet_name=\"Missing\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer</th>\n",
       "      <th>Product</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Discount</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1706.184</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>85.3092</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Phones</td>\n",
       "      <td>911.424</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>68.3568</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Art</td>\n",
       "      <td>8.560</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.4824</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Phones</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>16.0110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Binders</td>\n",
       "      <td>22.720</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>7.3840</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Eric Hoffmann</td>\n",
       "      <td>Binders</td>\n",
       "      <td>11.648</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.2</td>\n",
       "      <td>4.2224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Eric Hoffmann</td>\n",
       "      <td>Accessories</td>\n",
       "      <td>90.570</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.7741</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Ruben Ausman</td>\n",
       "      <td>NaN</td>\n",
       "      <td>77.880</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Accessories</td>\n",
       "      <td>13.980</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.1512</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>Binders</td>\n",
       "      <td>25.824</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>9.3612</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>Paper</td>\n",
       "      <td>146.730</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>68.9631</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              Customer      Product     Sales  Quantity  Discount   Profit\n",
       "0      Brosina Hoffman          NaN  1706.184       9.0       0.2  85.3092\n",
       "1      Brosina Hoffman       Phones   911.424       4.0       0.2  68.3568\n",
       "2   Zuschuss Donatelli          Art     8.560       2.0       0.0   2.4824\n",
       "3   Zuschuss Donatelli       Phones       NaN       3.0       0.2  16.0110\n",
       "4   Zuschuss Donatelli      Binders    22.720       4.0       0.2   7.3840\n",
       "5        Eric Hoffmann      Binders    11.648       NaN       0.2   4.2224\n",
       "6        Eric Hoffmann  Accessories    90.570       3.0       0.0  11.7741\n",
       "7         Ruben Ausman          NaN    77.880       2.0       0.0      NaN\n",
       "8                  NaN  Accessories    13.980       2.0       0.0   6.1512\n",
       "9         Kunst Miller      Binders    25.824       6.0       0.2   9.3612\n",
       "10        Kunst Miller        Paper   146.730       3.0       0.0  68.9631"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer</th>\n",
       "      <th>Product</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Discount</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Customer  Product  Sales  Quantity  Discount  Profit\n",
       "0      False     True  False     False     False   False\n",
       "1      False    False  False     False     False   False\n",
       "2      False    False  False     False     False   False\n",
       "3      False    False   True     False     False   False\n",
       "4      False    False  False     False     False   False\n",
       "5      False    False  False      True     False   False\n",
       "6      False    False  False     False     False   False\n",
       "7      False     True  False     False     False    True\n",
       "8       True    False  False     False     False   False\n",
       "9      False    False  False     False     False   False\n",
       "10     False    False  False     False     False   False"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing.isnull()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Customer has 1 missing value(s)\n",
      "Product has 2 missing value(s)\n",
      "Sales has 1 missing value(s)\n",
      "Quantity has 1 missing value(s)\n",
      "Discount has NO missing value!\n",
      "Profit has 1 missing value(s)\n"
     ]
    }
   ],
   "source": [
    "for c in df_missing.columns:\n",
    "    miss = df_missing[c].isnull().sum()\n",
    "    if miss>0:\n",
    "        print(\"{} has {} missing value(s)\".format(c,miss))\n",
    "    else:\n",
    "        print(\"{} has NO missing value!\".format(c))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 9: Filling missing values with `fillna()`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer</th>\n",
       "      <th>Product</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Discount</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>FILL</td>\n",
       "      <td>1706.18</td>\n",
       "      <td>9</td>\n",
       "      <td>0.2</td>\n",
       "      <td>85.3092</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Phones</td>\n",
       "      <td>911.424</td>\n",
       "      <td>4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>68.3568</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Art</td>\n",
       "      <td>8.56</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.4824</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Phones</td>\n",
       "      <td>FILL</td>\n",
       "      <td>3</td>\n",
       "      <td>0.2</td>\n",
       "      <td>16.011</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Binders</td>\n",
       "      <td>22.72</td>\n",
       "      <td>4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>7.384</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Eric Hoffmann</td>\n",
       "      <td>Binders</td>\n",
       "      <td>11.648</td>\n",
       "      <td>FILL</td>\n",
       "      <td>0.2</td>\n",
       "      <td>4.2224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Eric Hoffmann</td>\n",
       "      <td>Accessories</td>\n",
       "      <td>90.57</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.7741</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Ruben Ausman</td>\n",
       "      <td>FILL</td>\n",
       "      <td>77.88</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>FILL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>FILL</td>\n",
       "      <td>Accessories</td>\n",
       "      <td>13.98</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.1512</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>Binders</td>\n",
       "      <td>25.824</td>\n",
       "      <td>6</td>\n",
       "      <td>0.2</td>\n",
       "      <td>9.3612</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>Paper</td>\n",
       "      <td>146.73</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>68.9631</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              Customer      Product    Sales Quantity  Discount   Profit\n",
       "0      Brosina Hoffman         FILL  1706.18        9       0.2  85.3092\n",
       "1      Brosina Hoffman       Phones  911.424        4       0.2  68.3568\n",
       "2   Zuschuss Donatelli          Art     8.56        2       0.0   2.4824\n",
       "3   Zuschuss Donatelli       Phones     FILL        3       0.2   16.011\n",
       "4   Zuschuss Donatelli      Binders    22.72        4       0.2    7.384\n",
       "5        Eric Hoffmann      Binders   11.648     FILL       0.2   4.2224\n",
       "6        Eric Hoffmann  Accessories    90.57        3       0.0  11.7741\n",
       "7         Ruben Ausman         FILL    77.88        2       0.0     FILL\n",
       "8                 FILL  Accessories    13.98        2       0.0   6.1512\n",
       "9         Kunst Miller      Binders   25.824        6       0.2   9.3612\n",
       "10        Kunst Miller        Paper   146.73        3       0.0  68.9631"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing.fillna('FILL')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer</th>\n",
       "      <th>Product</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>FILL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Phones</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Art</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Phones</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Binders</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Eric Hoffmann</td>\n",
       "      <td>Binders</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Eric Hoffmann</td>\n",
       "      <td>Accessories</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Ruben Ausman</td>\n",
       "      <td>FILL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>FILL</td>\n",
       "      <td>Accessories</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>Binders</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>Paper</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              Customer      Product\n",
       "0      Brosina Hoffman         FILL\n",
       "1      Brosina Hoffman       Phones\n",
       "2   Zuschuss Donatelli          Art\n",
       "3   Zuschuss Donatelli       Phones\n",
       "4   Zuschuss Donatelli      Binders\n",
       "5        Eric Hoffmann      Binders\n",
       "6        Eric Hoffmann  Accessories\n",
       "7         Ruben Ausman         FILL\n",
       "8                 FILL  Accessories\n",
       "9         Kunst Miller      Binders\n",
       "10        Kunst Miller        Paper"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing[['Customer','Product']].fillna('FILL')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     1706.184\n",
       "1      911.424\n",
       "2        8.560\n",
       "3        8.560\n",
       "4       22.720\n",
       "5       11.648\n",
       "6       90.570\n",
       "7       77.880\n",
       "8       13.980\n",
       "9       25.824\n",
       "10     146.730\n",
       "Name: Sales, dtype: float64"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing['Sales'].fillna(method='ffill')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     1706.184\n",
       "1      911.424\n",
       "2        8.560\n",
       "3       22.720\n",
       "4       22.720\n",
       "5       11.648\n",
       "6       90.570\n",
       "7       77.880\n",
       "8       13.980\n",
       "9       25.824\n",
       "10     146.730\n",
       "Name: Sales, dtype: float64"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing['Sales'].fillna(method='bfill')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     1706.184\n",
       "1      911.424\n",
       "2        8.560\n",
       "3      301.552\n",
       "4       22.720\n",
       "5       11.648\n",
       "6       90.570\n",
       "7       77.880\n",
       "8       13.980\n",
       "9       25.824\n",
       "10     146.730\n",
       "Name: Sales, dtype: float64"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing['Sales'].fillna(df_missing.mean()['Sales'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 10: Dropping missing values with `dropna()`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer</th>\n",
       "      <th>Product</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Discount</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Phones</td>\n",
       "      <td>911.424</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>68.3568</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Art</td>\n",
       "      <td>8.560</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.4824</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>Binders</td>\n",
       "      <td>22.720</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>7.3840</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Eric Hoffmann</td>\n",
       "      <td>Accessories</td>\n",
       "      <td>90.570</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.7741</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>Binders</td>\n",
       "      <td>25.824</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>9.3612</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>Paper</td>\n",
       "      <td>146.730</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>68.9631</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              Customer      Product    Sales  Quantity  Discount   Profit\n",
       "1      Brosina Hoffman       Phones  911.424       4.0       0.2  68.3568\n",
       "2   Zuschuss Donatelli          Art    8.560       2.0       0.0   2.4824\n",
       "4   Zuschuss Donatelli      Binders   22.720       4.0       0.2   7.3840\n",
       "6        Eric Hoffmann  Accessories   90.570       3.0       0.0  11.7741\n",
       "9         Kunst Miller      Binders   25.824       6.0       0.2   9.3612\n",
       "10        Kunst Miller        Paper  146.730       3.0       0.0  68.9631"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing.dropna(axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Discount</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Discount\n",
       "0        0.2\n",
       "1        0.2\n",
       "2        0.0\n",
       "3        0.2\n",
       "4        0.2\n",
       "5        0.2\n",
       "6        0.0\n",
       "7        0.0\n",
       "8        0.0\n",
       "9        0.2\n",
       "10       0.0"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing.dropna(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Discount</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>1706.184</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>85.3092</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>911.424</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>68.3568</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>8.560</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.4824</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>16.0110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Zuschuss Donatelli</td>\n",
       "      <td>22.720</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>7.3840</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Eric Hoffmann</td>\n",
       "      <td>11.648</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.2</td>\n",
       "      <td>4.2224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Eric Hoffmann</td>\n",
       "      <td>90.570</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.7741</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Ruben Ausman</td>\n",
       "      <td>77.880</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>NaN</td>\n",
       "      <td>13.980</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.1512</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>25.824</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.2</td>\n",
       "      <td>9.3612</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Kunst Miller</td>\n",
       "      <td>146.730</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>68.9631</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              Customer     Sales  Quantity  Discount   Profit\n",
       "0      Brosina Hoffman  1706.184       9.0       0.2  85.3092\n",
       "1      Brosina Hoffman   911.424       4.0       0.2  68.3568\n",
       "2   Zuschuss Donatelli     8.560       2.0       0.0   2.4824\n",
       "3   Zuschuss Donatelli       NaN       3.0       0.2  16.0110\n",
       "4   Zuschuss Donatelli    22.720       4.0       0.2   7.3840\n",
       "5        Eric Hoffmann    11.648       NaN       0.2   4.2224\n",
       "6        Eric Hoffmann    90.570       3.0       0.0  11.7741\n",
       "7         Ruben Ausman    77.880       2.0       0.0      NaN\n",
       "8                  NaN    13.980       2.0       0.0   6.1512\n",
       "9         Kunst Miller    25.824       6.0       0.2   9.3612\n",
       "10        Kunst Miller   146.730       3.0       0.0  68.9631"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_missing.dropna(axis=1,thresh=10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 11: Outlier detection using simple statistical test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_sample = df[['Customer Name','State','Sales','Profit']].sample(n=50).copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\users\\tirtha\\python\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:189: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  self._setitem_with_indexer(indexer, value)\n"
     ]
    }
   ],
   "source": [
    "# Assign a wrong (negative value) in few places\n",
    "df_sample['Sales'].iloc[5]=-1000.0\n",
    "df_sample['Sales'].iloc[15]=-500.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZoAAAEPCAYAAAB7rQKTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X18HWWd9/HPt6kFWh6sioWbp4rKckrcFS2yatXEqlgfVlZBTGUX7uamt+5SWPUWgchS8JWXhRWQtbuWh7jACkcQ2SpiRag5aARci6tQOBV2pdQKKGALfYCWht/9x8wJ09OTJoFMJiTf9+s1r+Rc85trrjlnkt+Za66ZUURgZmaWlwlFN8DMzMY2JxozM8uVE42ZmeXKicbMzHLlRGNmZrlyojEzs1w50VhDkhZKisy0WdI9kuaPgnY9PsRlJqXLvXEY21GS9FNJm9L3Z/pw1d1gXaslfSWv+keKpJMljYrrKSR9RFJV0lZJq9OykHRyJma+pKMLa+QYMrHoBtio9iTw/vT3KcCHgUskbYyIa4pr1pBNAs4GVgO/GqY6/wl4OfBXwCbgkWGq13ImqQm4ClgGnETy+QG8FXgwEzofWAksHdEGjkFONLYz2yLizszr5ZLeBhwNvJQSTR4OBb4XEcuLboiBpN0i4ulBhu8L7AlcExE9tcK6fd2GkbvObKg2AC/LFkh6jaSlkp6StEHSjZJel5l/rKTnJM3OlE1P4zszr0PSXEn/ntbzR0lnD9Sggdafthng3zJdgdN3Ut8bJS1PuwvXSbpa0rRsO4HXAp9J66rspK52SfdJelrS45Juk3RYZv6itEtyo6S16br2GcQ2vyOta7OkJyRdJmmPzPyXS7pc0sOSnpG0RtJlA9T5QUm3pO/7U5LulPS+upiF6XYcns7fLOm/JL2jLm4XSYslrZf0J0kXUbff9NOGWv1vl/TLtO2/kjSrLm61pAsknSVpLfBUZt7H0/d0i6TfSeqUNDGddyLwuzT0u+nntzCd19d1ln6mbwZOyOwzJw7UfutHRHjytMMELAQeJznqnUjyDfB4YBvwt5m4XYDfAr8BjgM+RtLd8HvgFZm4MvBQWo+AbuDXwKR0/nQg0uUuAY4COoHngL+vb9dQ1g+0pnV/CfjLdNqln+3eG1gP3EFy5HY8sBa4m6QLbpd0+UeAq9PfZ/RT1zuBZ4EzgBaSbrYvA2/LxFyRrqMFOCZd733AhEzMauArmddvB7YA1wIfAP4m3d7rMzHfAFal78m70nVcOsBnfjLwGZLu0vcCFwK9wNvr3v/N6fvxv4E5wJ3AY8DkTNxFwDPA59KYG9L3MQax320m6cKaT9JdWyH5srBP3XvyCHBr+r5+NC1/X/pZX5lux2npe7Uk8/n+dRrzufTz2z+dF8DJ6e8zgCpwU2af2bvov8uX6lR4AzyNzin9g48G08V1cZ8iST4HZ8r2B7YCZ2TKXgE8DHQBp6R//H+RmT89rf9HdfVflv4TnZBp1+NDWT+we1r3iYPY7kUkiWbPTNmR6fJtmbLVZP7591PX/wPuGsJ73gTsl67rnf2tC/gp0F237LvT5ZrT1yuBBS/i859A8gXjZuAbDfaLd2fK3piWvT99/UrgaeALdfWtYnCJJoC5mbLdgT8Bi+rek0eAXeuWv7PBe3MaScKsJZTavvahuri+RJO+XgFcMVJ/c2N5cteZ7cyTwBHpNAs4laQrIdud9RbglxHx21pBRKwFfpYuUyv7E8mJ13kkJ9LPjYhfN1jnf9S9vgH4XyTJo5FBrX8I3kKS7Pq6YiLi5yT/2IZa36+AwyVdJOmdkibVB0iaI+l2SU+SJMy16axDGlUoaTLJSevrJE2sTUAPydHTmzPr/rykv5PUsK4Gde8v6UpJv0/b8izJEUL98ltJjjJq7kt/1j6jNwC7At+tBUTEc9nXg9C3H0TERuAWks8ma3lEPJNpfxPwJuDbdXHXkiS6tw5h/TaMnGhsZ7ZFxIp0+llE/DNwLnCmpFekMfsCf2iw7B9IjmKyfpyWTyA5Umnkj/283ref+KGsfzCGrb6IuJWke+mdJP+YH5f0L5KmAEg6AvgeSXL5G5J/hH+ZLr5rP9VOJTny+VeSRFCbtpCcAzkgjTuZZLTUPwK/kfSApE/011ZJE9K2vC1dppXkC8ayBm3ZkCaO2nZurWtz7RxTf5/lQDbGjif2/8iO+0D95/Qqkvegvrz2+oXsDzYMPOrMhqpKcq7itSTdGY8AhzWIm5bOz1pE8k/yUeCrwNwGy726n9f9DR8eyvoH45EGbajVd9dQK4uIK4ErJe0NfJTk3MUG4HSScwWPAcdF2lcj6aABqlxP0sWzEPhBg/kPp+tdT9JFeYqkPyfpPrpa0t0RcV+D5V4HHA7MiYgf1gol7TbITc16NP35arb/DBq9r43srh1Hkb2aHfeB+mtyHidJuvXrmZb+fCH7gw0DH9HYUDWnP2sjd34OvFnSa2oBkvYj+WbckylrARYAnwbagTZJH2tQ/1/Xvf4oyT+YtQ1iB7v++m/cO/Nz4Ki6EVxHkPTr9/S30EAi4rGIuITk/MqMtHg34Nlakkl9coB6NpGch/izzNFmdnq4wTJ3A58n+Xs/tJ+qawllS60gTXpvH8Tm1buHZCDARzJ1Tci+HoS+/UDS7iSDE/5zZwtERC/Jl4Fj62Z9nGRQyR1DWD8k+81g9hkbgI9obGcmSqp15Uwi6f//IvDdiKh9a70C+AKwTNI/kpx0PZvk2+Ul0PeP4hvAtRFxfVp2CfB1ST+JiMcy6zwsnfcdki6nduDUbFdNnQHXHxFbJT0IfFzSSpJ/gndnunyyLiRJhjdLOo/kRPQikn+e3xn4LXuepHNIumsqaXsOJxkBdnoacgvwD5K+CtxIkhyPH0TVp5Fc0/QccD3JEdKBwAeBjoi4X1IPyXmOlSTf/GsXJvb3z3oVSTK/QNJZwB7AOSQDMYYkIp6QdClwjqRtwL3p+ncfZBVPA53pfvMwyaCKScDFg1j2bJLP7t+Ab5GcL/oScFl67m4oVpF86TgKeAJ4MCKeGGIdBh515qnxxI6jzrYCDwDnAXvUxR5Mcj5gA7AR+D7w+sz8S0iOSrLDnXcnGZb8nfT19HQ9nyQZCr2BpFvpHEB17Xp8KOtPY95HMiT3mXQ903ey7YeTnE/aTNJVdQ0wrS5mNQOPOvsQsDzdjmdIhmCfXrc9p5EcHW4iGar7enYc/bTDukhGwv2Q5PqRTSQn5C8E9krn/xNJctyQbkM38I4B2nsESSJ6Ov2sTyRJ5Ct29v6n5fVt3oXkPNKTwDrga8BnGdyos8eBd5AMaNhCMgz+nXVx/b7/JEO67yHZZ9eSDJOfmJlf29cGGnV2cPqZPMkgRy16ajwpfUPNCqXkAsoHgQ9HxPeLbY0VJb148uSIeFXRbbHh43M0ZmaWKycaMzPLlbvOzMwsVz6iMTOzXHl4M/CqV70qpk+fXnQzxoxNmzYxZcqUopthtgPvm8Prrrvuejwi9h4ozokGmD59OitWrCi6GWNGpVKhpaWl6GaY7cD75vCS9NBg4tx1ZmZmuXKiMTOzXDnRmJlZrnJLNJJeJ+kSSXdL6lWDx92mj2ONuunRBnEz9PyjdR+WdG767IlsjCSdmT669WlJP5H0xry2z8zMBifPwQCHkTxm9k52/qzwa0jug1Sz3Y0OJU0lud/QfSR3f30tcAFJkvxiJvR04CySu9SuIrmv0q2SmuP5G0CamdkIy7Pr7MaIOCAijiW5e2t/HomIOzPTL+vmf4rkFuYfjYhbImIJyY0WPytpTwBJu5Ikmi9HxOJIHjh1LOlN8oZ7w6yxcrlMc3Mzs2fPprm5mXK5XHSTzGwUyO2IJvq/rftQzQFujsyjdUlu/30eyS3Xa7dX3xO4LrP+TZJuTJfPHvlYDsrlMh0dHXR1ddHb20tTUxPt7e0AtLW1Fdw6MyvSaBgM0C5pq6QnJV3f4AmDh5J0hfWJiDUkt3A/NBPTS3Jr86wq/T/oyYZRZ2cnXV1dtLa2MnHiRFpbW+nq6qKzs7PopplZwYq+YPO7JOdw1gIlkocW/VTSGyLiyTRmKsnzNOqtS+fVYjZG8oS9+pjJkiZF3UOuJM0H5gNMmzaNSqUyDJszflWrVXp7e6lUKmzcuJFKpUJvby/VatXvrY0atX3TRlahiSYiTs28/Kmk20kednQig3ua3otZ96XApQAzZ84MXy384pRKJZqammhpaem7+rq7u5tSqeQrsW3U8J0BijEaus76RMRKkqcQvilTvA7Yq0H41HReLWb3+iHPaczm+qMZG34dHR20t7fT3d3Ntm3b6O7upr29nY6OjqKbZmYFK7rrrJH65xasou48i6QDgMk8f+5mFdAEvI4kUdXscH7H8tHW1sbtt9/OnDlz2LJlC7vssgsnnXSSBwKY2eg6opHUTJIc7soULwOOkrRHpuw4kuea35a+vp3k2enHZuqaDHw4Xd5yVi6Xuemmm1i2bBm33HILy5Yt46abbvIQZzPL9c4AkyUdI+kYYD9g79rrdN4HJV0t6ROSWiR9GrgZWANckalqCbAFuEHSe9KT+AuBC2tDniPiGWARcKakv5c0G/h2un3Zi0EtJx51Zmb9ybPr7NUk/+yzaq9fA/wO2IckEbwceAL4IXBm9pqZiFiXJo7FJNfMrAcuIkk2WYtIEssZwCuBFcB7I+IPw7dJ1p9qtcqsWbO2K5s1axbVarWgFpnZaJHnBZurAQ0QNnuQdd0HvHuAmAA608lGWKlUoqenh9bW1r6ynp4eSqVSga0ys9FgVJ2jsZcujzozs/6MxlFn9hJUG122YMECqtUqpVKJzs5OjzozMycaGz5tbW20tbX5ojgz2467zszMLFdONGZmlisnGjMzy5UTjZmZ5cqJxszMcuVEY2ZmuXKiMTOzXDnRmJlZrpxozMwsV040ZmaWKycaMzPLlRONmZnlyonGzMxy5URjZma5cqIxM7NcOdGYmVmunGjMzCxXTjRmZpYrJxozM8uVE42ZmeXKicbMzHLlRGNmZrlyojEzs1w50ZiZWa5ySzSSXifpEkl3S+qVVGkQI0lnSvqdpKcl/UTSGxvEzZC0XNJmSQ9LOldS0wupy8zMRlaeRzSHAR8AfgPc30/M6cBZwHnAh4GNwK2S9qkFSJoK3AoE8BHgXOBzwDlDrcvMzEZenonmxog4ICKOBe6tnylpV5Lk8OWIWBwRtwLHkiSUkzOhnwJ2Az4aEbdExBKSJPNZSXsOsS4zMxthuSWaiHhugJC3AXsC12WW2QTcCMzJxM0Bbo6IpzJl3yJJPu8aYl1mZjbCihwMcCjQCzxQV15N52XjVmUDImINsDkTN9i6zMxshE0scN1TgY0R0VtXvg6YLGlSRGxN49Y3WH5dOm8odfWRNB+YDzBt2jQqlcqL2hh73saNG/1+2qjkfbMYRSaaQkXEpcClADNnzoyWlpZiGzSGVCoV/H7aaOR9sxhFdp2tA3avH6ZMcnSyOXMEsg7Yq8HyU9N5Q6nLzMxGWJGJZhXQBLyurrz+nMwq6s6zSDoAmJyJG2xdZmY2wopMNLcDT5EMQwZA0mSSa2CWZeKWAUdJ2iNTdhzwNHDbEOsyM7MRlts5mvQf/QfSl/sBe0o6Jn39g4jYLGkRcJakdSRHHp8lSX5fy1S1BDgFuEHSecDBwELgwtqQ54h4ZpB1mZnZCMtzMMCrgW/XldVevwZYDSwiSQZnAK8EVgDvjYg/1BaIiHWSZgOLSa6LWQ9cRJJssgasy8zMRl5uiSYiVgMaICaAznTaWdx9wLuHoy4zMxtZvnuzmZnlyonGzMxy5URjZma5cqIxM7NcOdGYmVmunGjMzCxXTjRmZpYrJxozM8uVE42ZmeXKicbMzHLlRGNmZrlyorFhUy6XaW5uZvbs2TQ3N1Mul4tukpmNAuP2Uc42vMrlMh0dHXR1ddHb20tTUxPt7e0AtLW1Fdw6MyuSj2hsWHR2dtLV1UVraysTJ06ktbWVrq4uOjt9M22z8c6JxoZFtVpl1qxZ25XNmjWLarVaUIvMbLRworFhUSqV6Onp2a6sp6eHUqlUUIvMbLRworFh0dHRQXt7O93d3Wzbto3u7m7a29vp6OgoumlmVjAPBrBhUTvhv2DBAqrVKqVSic7OTg8EMDMf0ZiZWb58RGPDwsObzaw/TjQ2LDo7O5k7d+52XWdz585195mZOdHY8LjvvvvYtGkT3/jGN/qOaObNm8dDDz1UdNPMrGA+R2PDYtKkSSxYsGC7CzYXLFjApEmTim6amRXMRzQ2LLZu3crixYs5/PDD6e3tpbu7m8WLF7N169aim2ZmBXOisWExY8YMjj766B3O0SxdurTopplZwZxobFh0dHQ0HHXme52ZWaGJRtKJwL81mPXpiFiSxgg4A/g08CrgF8ApEfGrurpmAF8D3gqsBy4HzomI3tw2wPr4gk0z689oOaJ5N/B05vVvM7+fDpwFfB5YBXwWuFVSc0Q8CiBpKnArcB/wEeC1wAUkgx2+mHvrDUiSTVtbG5VKhZaWlqKbY2ajxGhJNL+IiI31hZJ2JUk0X46IxWnZHcBq4GSeTyKfAnYDPhoRTwG3SNoTWCjp/LTMzMwKMNqHN78N2BO4rlYQEZuAG4E5mbg5wM11CeVbJMnnXSPQTjMz68doSTT/I2mbpN9I+r+Z8kOBXuCBuvhqOi8btyobEBFrgM11cWZmNsKK7jp7hOT8y38CTcAngCWSJkfERcBUYGODE/rrgMmSJkXE1jRufYP616XzdiBpPjAfYNq0aVQqlWHYnPFt+fLlfPOb32TNmjUceOCBHH/88cyePbvoZpn12bhxo//WC1BooomIm4GbM0XL0vMyHZIuznndlwKXAsycOTN88vrFKZfLXH311dvdgqa9vZ0ZM2Z45JmNGh6oUozR0nWWdT3wSuAgkiOS3SU11cVMBTanRzOkcXs1qGtqOs9y1tnZSVdX13a3oOnq6vJ1NGY2KhNNZH5fRdKl9rq6mPpzMquoOxcj6QBgcl2c5aRarTJr1qztymbNmkW1Wi2oRWY2WozGRHMM8ATwEHA78BRwbG2mpMnAh4FlmWWWAUdJ2iNTdhzJtTm35d1gg1KpRE9Pz3ZlPT09lEqlglpkZqNF0XcGuB64E1iZtuW4dDolIp4DnpG0CDhL0jqev2BzAsldAGqWAKcAN0g6DzgYWAhc6GtoRkZHRwft7e19t6Dp7u72LWjMDCh+1Nn9wEnAAYBIruz/24j490zMIpLEcgbJuZsVwHsj4g+1gIhYJ2k2sJjkGpv1wEUkycZGgG9BY2b9UUQMHDXGzZw5M1asWFF0M8YMj+yx0cr75vCSdFdEzBwobjSeozEzszHEicbMxrxyuUxzczOzZ8+mubmZcrlcdJPGlaLP0ZiZ5apcLjd8VhLgc4gjxEc0Zjam+WLi4jnRmNmY5ouJi+dEY2Zjmi8mLp7P0dgLkjxhe+g8nN5Gmi8mLp4Tjb0gO0sY00+/idWLPjiCrTHrny8mLp4TjZmNeW1tbbS1tfmCzYL4HI2ZmeXKicbMzHLlRGNmZrlyojEzs1w50ZiZWa6caMzMLFdONGZmlisnGjMzy5UTjZmZ5cqJxszGPD/4rFi+BY2ZjWl+8Fnx5LvpwsyZM2PFihVFN2NU+otzfsSTTz+b6zr22u1l/Prs9+W6Dhu/mpub+drXvkZra2vfvc66u7tZsGABK1euLLp5L2mS7oqImQPF+YjGdurJp58d8p2Yh3rjwumn3zTEVpkNnh98VjwnGjMb00qlEh//+MdZtmwZW7ZsYZdddmHOnDl+8NkI8mAAMxvT9ttvP5YuXcq8efO48cYbmTdvHkuXLmW//fYrumnjho9ozGxMu+222zjkkENYsmQJX//615HEIYccwm233VZ008YNJxozG9O2bNnC/fffT1NTE729vUyYMIH777+/6GaNK+46M7Nx4fzzz2fZsmWcf/75RTdl3BkziUbSDEnLJW2W9LCkcyU1Fd0uMyvehAkTdvra8jUmus4kTQVuBe4DPgK8FriAJJF+scCmmdkocOSRR3LmmWf2jTo78sgjueOOO4pu1rgxJi7YlHQGcBpwUEQ8lZadBiwE9qmV9ccXbPbvDVe+YUTWc88J94zIemx8kDTkZcbC/8KRNt4u2JwD3FyXUL4FnAe8C7ixkFaNARuqi3zBpo1KO7trxUFf+H7f73+6ZQkbfnkTSBDPgSZABHu86YO84r2f6otrtB/6rhXDY6wkmkOBH2cLImKNpM3pPCeaF+EFJYIfDn6ZvXZ72dDrt3HvuemfY49BxO1RAjiswZzVwOk7XwcAPtp+scZKopkKrG9Qvi6dtwNJ84H5ANOmTaNSqeTWuJeyK94/pWF5a2vrC6qvu7u7YbnffxuqDdVFua9jysu8bw6HsZJohiwiLgUuheQczVC6emzn/dlD7TozeyFWtww+tlwu09nZyb33VTlsRomOjg7fuXkEjZVEsw7Yq0H51HSemY0jOxsMcO+99zJ37lzmzp27XbkHA+RnrCSaVSTnYvpIOgCYnM4zs3EkmzQOOOAAtm3bxjXXXNP3PJq5c+cyceJEfve73xXYyvFjrFy1tAw4SlL23OBxwNOAb2hkNo6tXbuWq666itbWViZOnEhraytXXXUVa9euLbpp48ZYSTRLgC3ADZLek57oXwhcONA1NGZmlq8xkWgiYh0wG2giGcp8DnARcHaR7TKz4u2///6ccMIJdHd3s23bNrq7uznhhBPYf//9i27auDFWztEQEfcB7y66HWY2upx//vmceuqpzJs3jzVr1nDggQeybds2LrjggqKbNm6MiSMaM7P+tLW1cfHFFzNlSnJN2JQpU7j44os9vHkEjZkjGjOz/rS1tdHW1uZrvAriIxozM8uVE42ZmeXKicbMzHLlRGNmZrlyojEzs1w50ZiZWa6caMzMLFdONGY25pXLZZqbm5k9ezbNzc2Uy+WimzSu+IJNMxvTyuUyHR0ddHV19T0moL29HcB3BxghPqIxszGts7OTrq6u7R4T0NXVRWdnZ9FNGzecaMxsTKtWq8yaNWu7slmzZlGtVgtq0fjjRGNmY1qpVKKnp2e7sp6eHkqlUkEtGn98jsbMxrSOjg6OO+44pkyZ0veYgE2bNnHxxRcX3bRxw0c0ZjZuRETRTRiXnGjMbEzr7Ozk2muv5cEHH+THP/4xDz74INdee60HA4wgJxozG9M8GKB4TjRmNqZ5MEDxnGjMbEzr6Oigvb2d7u5utm3bRnd3N+3t7XR0dBTdtHHDo87MbEyrXf2/YMECqtUqpVKJzs5O3xVgBDnRmNmY19bWRltbG5VKhZaWlqKbM+6468zMzHLlRGNmZrlyojEzs1w50ZjZmOfn0RSrsMEAkirAuxrM2i0insnE7QcsBt4DbAG+BZwWEZvr6jsJOA04ALg3jVmeT+vN7KXCz6MpXtFHNN3AW+umLbWZkl4G3AwcBHwCOBU4Frg0W4mkNmAJcBUwhyTRfF9Sc/6bYGajmZ9HU7yiE82fIuLOuil717tjgBLwsYi4KSKuBhYAcyW9PhO3ELgyIr4UEd3AicB/A6ePzGYYuHvCRiffgqZ4o/06mjnALyLiwUzZUmAr8H7gAUkHA4eQHO0AEBHPSfp2tszy5e4JG61qt6BpbW3tK/MtaEZW0Uc075O0OZ1ulvTndfMPBVZlCyJiK/A/6TwyP7eLA6rAKyTtPdyNth25e8JGK9+CpnhFHtHcBlxJ0sV1ENAB/FTSX0TE6jRmKrC+wbLr0nlkftbHrcvMf6y+AknzgfkA06ZNo1KpvKCNsES1WqW3t5dKpcLGjRupVCr09vZSrVb93lqh9t13Xz75yU8yb968vgefHX/88ey7777eN0fIsCUaSXsB+w4UFxGr0p9nZ4p/KulWkqOSU4HPDFe7dtKOS0kHFcycOTN8W4oXp1Qq0dTUREtLS99tPrq7uymVSr7lhxWupaWFL33pS74FTUGG84jmWOCyQcSpUWFEPCrpZ8CbMsXrgL0ahE8Ffp2JIY1bXxeTnW85qnVP1M7R1Lon3HVmZsOWaCLicuDyF1tN3etVPH8OBgBJk4CDSYYz12JI4x7KhB5KMqpth24zG36+Q66Z9afowQB9JO0DzALuyhQvA46QdFCm7K+AXYAfAkTEb4H7SY6oanVNSF8vy7nZltHW1sbKlStZvnw5K1eudJIxM6CgwQDp6LJO4DpgLXAgcAbwHPDVTOj1JIMEbpB0Fkn32EXANRHxQCZuIfBNSauBnwEnAK8H5ua6IWZmNqCiRp09ATQB5wOvBDYAFeDoiFhTC4qIZyW9n+QWNNfx/C1oPp+tLCLKknYHvgCcRXJngA9FxMr8N8XMzHamkEQTEb8HPjDI2LXA0YOIu4zBDUYwM7MRNGrO0ZiZ2djkRGNmZrlyojEzs1w50ZiZWa6caMzMLFdONGZmlisnGjMzy5UTjZmZ5cqJxszMcuVEY2ZmuXKiMTOzXDnRmJlZrpxozMwsV040ZmaWKycaMzPLlRONmZnlyonGzMxy5URjZma5cqIxM7NcOdGYmVmunGjMzCxXTjRmZpYrJxozM8uVE42ZmeXKicbMzHLlRGNmZrlyojEzs1zlkmgkHSfpBkmPSApJJ/YTt5+k/5C0QdLjkhZLmtwg7iRJD0h6RtJdkma/0LrMzGxk5XVEcwwwHfh+fwGSXgbcDBwEfAI4FTgWuLQurg1YAlwFzAHuBb4vqXmodZmZ2cjLK9EcFxFvAj6zk5hjgBLwsYi4KSKuBhYAcyW9PhO3ELgyIr4UEd3AicB/A6e/gLosR+VymebmZmbPnk1zczPlcrnoJpnZKDAxj0oj4rlBhM0BfhERD2bKlgJbgfcDD0g6GDiE5Ailr25J386WDaauF7QhNmjlcpmOjg66urro7e2lqamJ9vZ2ANra2gpunZkVqcjBAIcCq7IFEbEV+J90Hpmf28UBVeAVkvYeQl2Wo87OTrq6umhtbWXixIm0trbS1dVFZ2dn0U0zs4LlckQzSFOB9Q3K16XzyPysj1uXmf/YIOvajqT5wHyAadOmUalUBttua6BardLb20ulUmHjxo1UKhV6e3sz29QPAAAGuElEQVSpVqt+b23UqO2bNrIGlWgk7QXsO1BcRNQfeYxaEXEp6WCBmTNnRktLS7ENeokrlUo0NTXR0tJCpVKhpaWF7u5uSqUSfm9ttKjtmzayBntEcyxw2SDiNIR1rwP2alA+Ffh1JoY0bn1dTHb+YOqyHHV0dNDe3t53jqa7u5v29nZ3nZnZ4BJNRFwOXD7M615F3fkTSZOAg0mGM9diSOMeyoQeCvwpIh4bQl2Wo9oJ/wULFlCtVimVSnR2dnoggJkVOhhgGXCEpIMyZX8F7AL8ECAifgvcT3JEBYCkCenrZUOpy/LX1tbGypUrWb58OStXrnSSMTMgp8EAkmYAM4Bd06KZkjYCj0XEbWnZ9UAHcIOks0i6vi4CromI7HDkhcA3Ja0GfgacALwemJuJGWxdZmY2wvIadfZx4OzM679Pp9uAFoCIeFbS+4HFwHXAFuBbwOezFUVEWdLuwBeAs0juDPChiFiZiRlUXWZmNvLyumBzIcmRyEBxa4GjBxF3GQMMRhhsXWZmNrJ892YzM8uVE42ZmeVKEVF0Gwon6TG2Hz5tL86rgMeLboRZA943h9dBEbH3QEFONDbsJK2IiJlFt8OsnvfNYrjrzMzMcuVEY2ZmuXKisTz4yaY2WnnfLIDP0ZiZWa58RGNmZrlyojEzs1w50RgAkk6UdJekDZLWSfovSRe+gHpWS/pKHm208UnSQkmRmR6W9B1Jrx2m+kuSfippU1r/dEkVSddnYt4n6R+GY33jkRONIekMkucN3Qx8FPhb4Lskj1owGw2eBN6aTv8PeCOwXNKUYaj7n4CXk+zvbwUeAf4OOCMT8z7AieYFyuvuzfbScjJwSUScmSm7UdI5RTXIrM62iLgz/f1OSQ8BPcAckseE9JHUBDRFxNZB1n0o8L2IWJ4pu+/FNtie5yMag+Tb3KP1hVE3JFHSIkn3SNooaa2kqyXtM1Dlkt4h6TZJmyU9IekySXtk5r9c0uVpl8gzktZIGsyjw238+mX68zWSrpC0QtLRku4FngGOBJD0RknL031vXbrPTkvnTZcUwGuBz6TdZpV0Xl/XmaSFwOeAgzLdd1eM5Ma+1PmIxiD5o10gaQ3w/Yh4op+4fYDzgLUk94z6HPBjSc0R8VyjBSS9HbgVWAocA7wSWARMTV8DXAi8DfgMScI7AHjnMGyXjV3T05+PAoelr88Hzk3LHpS0N1ABqiQPStydZN+7RdJMki6ytwL/AfwY+BrwVIN1XU7ysMV3A3+dlj3WIM764URjkDyUbilwBRCSqsB3gK9ERN8fXkScWPs97Z64gyTpzAJ+0k/di4DbI+K4zLK/J+lfb04fYPcW4F8i4trMct8chu2yMURS7f/VwcDXgQ3AcmA2yReY90TErzLxi9Jfj6rtx5IeAO4EPhYRZZJuuC3AI5muue1ExFpJjwBb+ouxnXPXmRERdwMlkpOh/wqI5GmmK9KnmwIgaY6k2yU9CWwjSTIAhzSqV9Jkkm+M10maWJtI+tafBd6chv4K+Lykv5PUsC4b915Jss88C/wGeA1wXEQ8nM7/fTbJpN4C/Kjuy9LPgdUkX45shDjRGAARsSUiboyIkyNiBvB/SLoL2gEkHQF8jyS5/A1JAvnLdPFd+6l2KtBEkryezUxbgJeRdJFBMhhhKfCPwG8kPSDpE8O7hfYS9yRwBDAT2B+YHhHLMvP/0GCZffsp/wPwimFvofXLXWfWUER0STqfZEQOJH3Tj5F8iwwASQcNUM16IEge6/2DBvMfTte1HjgFOEXSnwOnAVdLujsiPPrHIBl1tmIn8xvdS+sR4NUNyqcBdw1Lq2xQfERjSNrhjzE9kboXz38j3A14tm4k2id3Vm9EbCLpD/+ziFjRYHq4wTJ3A58n2TcPrZ9vNgQ/B46qG+F4BMnAgZ4h1rWV/o/cbQA+ojGAeyR9F/gR8EfgIJKL4jYDV6YxtwD/IOmrwI0ko8SOH0Tdp5Gc+H+O5HqHDcCBwAeBjoi4X1IPyciflSTfTE8CNgH/OTybZ+PUhcCngZslncfzo87uIRnsMhSrgGmSTiTZTx+PiNXD19SxzYnGIBkS+hHgn0n6rh8FbifpJnsQICJ+IOkLwAKSRHAH8CHg/p1VHBE9kt4JnAP8O8k5m4eAH/L80dIdwIkk3zR7gf8C5kTE2vr6zAYrIh6T1ApcAJRJjkp+AHxmCBdz1lwHtJIMod6b5AvYicPX2rHNjwkwM7Nc+RyNmZnlyonGzMxy5URjZma5cqIxM7NcOdGYmVmunGjMzCxXTjRmZpYrJxozM8vV/wcvNj1K2GvvUgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x25bed1f3cf8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_sample.plot.box()\n",
    "plt.title(\"Boxplot of sales and profit\", fontsize=15)\n",
    "plt.xticks(fontsize=15)\n",
    "plt.yticks(fontsize=15)\n",
    "plt.grid(True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 12: Concatenation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_1 = df[['Customer Name','State','Sales','Profit']].sample(n=4)\n",
    "df_2 = df[['Customer Name','State','Sales','Profit']].sample(n=4)\n",
    "df_3 = df[['Customer Name','State','Sales','Profit']].sample(n=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1606</th>\n",
       "      <td>Jas O'Carroll</td>\n",
       "      <td>California</td>\n",
       "      <td>73.200</td>\n",
       "      <td>21.2280</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9404</th>\n",
       "      <td>Dave Poirier</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>7.920</td>\n",
       "      <td>1.6830</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2008</th>\n",
       "      <td>Jamie Frazer</td>\n",
       "      <td>Tennessee</td>\n",
       "      <td>4.224</td>\n",
       "      <td>0.4752</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4627</th>\n",
       "      <td>Cyma Kinney</td>\n",
       "      <td>New York</td>\n",
       "      <td>24.880</td>\n",
       "      <td>6.9664</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Customer Name         State   Sales   Profit\n",
       "1606  Jas O'Carroll    California  73.200  21.2280\n",
       "9404   Dave Poirier  Pennsylvania   7.920   1.6830\n",
       "2008   Jamie Frazer     Tennessee   4.224   0.4752\n",
       "4627    Cyma Kinney      New York  24.880   6.9664"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1680</th>\n",
       "      <td>Sean Braxton</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>42.368</td>\n",
       "      <td>8.4736</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4930</th>\n",
       "      <td>Katherine Ducich</td>\n",
       "      <td>New York</td>\n",
       "      <td>14.940</td>\n",
       "      <td>7.0218</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8550</th>\n",
       "      <td>Victoria Brennan</td>\n",
       "      <td>Texas</td>\n",
       "      <td>6.264</td>\n",
       "      <td>2.0358</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8559</th>\n",
       "      <td>Laura Armstrong</td>\n",
       "      <td>Texas</td>\n",
       "      <td>287.880</td>\n",
       "      <td>35.9850</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Customer Name         State    Sales   Profit\n",
       "1680      Sean Braxton  Pennsylvania   42.368   8.4736\n",
       "4930  Katherine Ducich      New York   14.940   7.0218\n",
       "8550  Victoria Brennan         Texas    6.264   2.0358\n",
       "8559   Laura Armstrong         Texas  287.880  35.9850"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>8890</th>\n",
       "      <td>Frank Carlisle</td>\n",
       "      <td>Missouri</td>\n",
       "      <td>1927.59</td>\n",
       "      <td>751.7601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5620</th>\n",
       "      <td>Steve Chapman</td>\n",
       "      <td>Wisconsin</td>\n",
       "      <td>3.04</td>\n",
       "      <td>1.0336</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8139</th>\n",
       "      <td>Neil Knudson</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>284.08</td>\n",
       "      <td>24.8570</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>764</th>\n",
       "      <td>Chris Selesnick</td>\n",
       "      <td>Louisiana</td>\n",
       "      <td>50.94</td>\n",
       "      <td>25.4700</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Customer Name         State    Sales    Profit\n",
       "8890   Frank Carlisle      Missouri  1927.59  751.7601\n",
       "5620    Steve Chapman     Wisconsin     3.04    1.0336\n",
       "8139     Neil Knudson  Pennsylvania   284.08   24.8570\n",
       "764   Chris Selesnick     Louisiana    50.94   25.4700"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1606</th>\n",
       "      <td>Jas O'Carroll</td>\n",
       "      <td>California</td>\n",
       "      <td>73.200</td>\n",
       "      <td>21.2280</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9404</th>\n",
       "      <td>Dave Poirier</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>7.920</td>\n",
       "      <td>1.6830</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2008</th>\n",
       "      <td>Jamie Frazer</td>\n",
       "      <td>Tennessee</td>\n",
       "      <td>4.224</td>\n",
       "      <td>0.4752</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4627</th>\n",
       "      <td>Cyma Kinney</td>\n",
       "      <td>New York</td>\n",
       "      <td>24.880</td>\n",
       "      <td>6.9664</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1680</th>\n",
       "      <td>Sean Braxton</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>42.368</td>\n",
       "      <td>8.4736</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4930</th>\n",
       "      <td>Katherine Ducich</td>\n",
       "      <td>New York</td>\n",
       "      <td>14.940</td>\n",
       "      <td>7.0218</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8550</th>\n",
       "      <td>Victoria Brennan</td>\n",
       "      <td>Texas</td>\n",
       "      <td>6.264</td>\n",
       "      <td>2.0358</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8559</th>\n",
       "      <td>Laura Armstrong</td>\n",
       "      <td>Texas</td>\n",
       "      <td>287.880</td>\n",
       "      <td>35.9850</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8890</th>\n",
       "      <td>Frank Carlisle</td>\n",
       "      <td>Missouri</td>\n",
       "      <td>1927.590</td>\n",
       "      <td>751.7601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5620</th>\n",
       "      <td>Steve Chapman</td>\n",
       "      <td>Wisconsin</td>\n",
       "      <td>3.040</td>\n",
       "      <td>1.0336</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8139</th>\n",
       "      <td>Neil Knudson</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>284.080</td>\n",
       "      <td>24.8570</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>764</th>\n",
       "      <td>Chris Selesnick</td>\n",
       "      <td>Louisiana</td>\n",
       "      <td>50.940</td>\n",
       "      <td>25.4700</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Customer Name         State     Sales    Profit\n",
       "1606     Jas O'Carroll    California    73.200   21.2280\n",
       "9404      Dave Poirier  Pennsylvania     7.920    1.6830\n",
       "2008      Jamie Frazer     Tennessee     4.224    0.4752\n",
       "4627       Cyma Kinney      New York    24.880    6.9664\n",
       "1680      Sean Braxton  Pennsylvania    42.368    8.4736\n",
       "4930  Katherine Ducich      New York    14.940    7.0218\n",
       "8550  Victoria Brennan         Texas     6.264    2.0358\n",
       "8559   Laura Armstrong         Texas   287.880   35.9850\n",
       "8890    Frank Carlisle      Missouri  1927.590  751.7601\n",
       "5620     Steve Chapman     Wisconsin     3.040    1.0336\n",
       "8139      Neil Knudson  Pennsylvania   284.080   24.8570\n",
       "764    Chris Selesnick     Louisiana    50.940   25.4700"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_cat1 = pd.concat([df_1,df_2,df_3], axis=0)\n",
    "df_cat1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Profit</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Profit</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>764</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Chris Selesnick</td>\n",
       "      <td>Louisiana</td>\n",
       "      <td>50.94</td>\n",
       "      <td>25.4700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1606</th>\n",
       "      <td>Jas O'Carroll</td>\n",
       "      <td>California</td>\n",
       "      <td>73.200</td>\n",
       "      <td>21.2280</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1680</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Sean Braxton</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>42.368</td>\n",
       "      <td>8.4736</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2008</th>\n",
       "      <td>Jamie Frazer</td>\n",
       "      <td>Tennessee</td>\n",
       "      <td>4.224</td>\n",
       "      <td>0.4752</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4627</th>\n",
       "      <td>Cyma Kinney</td>\n",
       "      <td>New York</td>\n",
       "      <td>24.880</td>\n",
       "      <td>6.9664</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4930</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Katherine Ducich</td>\n",
       "      <td>New York</td>\n",
       "      <td>14.940</td>\n",
       "      <td>7.0218</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5620</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Steve Chapman</td>\n",
       "      <td>Wisconsin</td>\n",
       "      <td>3.04</td>\n",
       "      <td>1.0336</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8139</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Neil Knudson</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>284.08</td>\n",
       "      <td>24.8570</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8550</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Victoria Brennan</td>\n",
       "      <td>Texas</td>\n",
       "      <td>6.264</td>\n",
       "      <td>2.0358</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8559</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Laura Armstrong</td>\n",
       "      <td>Texas</td>\n",
       "      <td>287.880</td>\n",
       "      <td>35.9850</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8890</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Frank Carlisle</td>\n",
       "      <td>Missouri</td>\n",
       "      <td>1927.59</td>\n",
       "      <td>751.7601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9404</th>\n",
       "      <td>Dave Poirier</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>7.920</td>\n",
       "      <td>1.6830</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Customer Name         State   Sales   Profit     Customer Name  \\\n",
       "764             NaN           NaN     NaN      NaN               NaN   \n",
       "1606  Jas O'Carroll    California  73.200  21.2280               NaN   \n",
       "1680            NaN           NaN     NaN      NaN      Sean Braxton   \n",
       "2008   Jamie Frazer     Tennessee   4.224   0.4752               NaN   \n",
       "4627    Cyma Kinney      New York  24.880   6.9664               NaN   \n",
       "4930            NaN           NaN     NaN      NaN  Katherine Ducich   \n",
       "5620            NaN           NaN     NaN      NaN               NaN   \n",
       "8139            NaN           NaN     NaN      NaN               NaN   \n",
       "8550            NaN           NaN     NaN      NaN  Victoria Brennan   \n",
       "8559            NaN           NaN     NaN      NaN   Laura Armstrong   \n",
       "8890            NaN           NaN     NaN      NaN               NaN   \n",
       "9404   Dave Poirier  Pennsylvania   7.920   1.6830               NaN   \n",
       "\n",
       "             State    Sales   Profit    Customer Name         State    Sales  \\\n",
       "764            NaN      NaN      NaN  Chris Selesnick     Louisiana    50.94   \n",
       "1606           NaN      NaN      NaN              NaN           NaN      NaN   \n",
       "1680  Pennsylvania   42.368   8.4736              NaN           NaN      NaN   \n",
       "2008           NaN      NaN      NaN              NaN           NaN      NaN   \n",
       "4627           NaN      NaN      NaN              NaN           NaN      NaN   \n",
       "4930      New York   14.940   7.0218              NaN           NaN      NaN   \n",
       "5620           NaN      NaN      NaN    Steve Chapman     Wisconsin     3.04   \n",
       "8139           NaN      NaN      NaN     Neil Knudson  Pennsylvania   284.08   \n",
       "8550         Texas    6.264   2.0358              NaN           NaN      NaN   \n",
       "8559         Texas  287.880  35.9850              NaN           NaN      NaN   \n",
       "8890           NaN      NaN      NaN   Frank Carlisle      Missouri  1927.59   \n",
       "9404           NaN      NaN      NaN              NaN           NaN      NaN   \n",
       "\n",
       "        Profit  \n",
       "764    25.4700  \n",
       "1606       NaN  \n",
       "1680       NaN  \n",
       "2008       NaN  \n",
       "4627       NaN  \n",
       "4930       NaN  \n",
       "5620    1.0336  \n",
       "8139   24.8570  \n",
       "8550       NaN  \n",
       "8559       NaN  \n",
       "8890  751.7601  \n",
       "9404       NaN  "
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_cat2 = pd.concat([df_1,df_2,df_3], axis=1)\n",
    "df_cat2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 13: Merging by a common key"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Darrin Van Huff</td>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Customer Name  Ship Date       Ship Mode\n",
       "0      Claire Gute 2016-11-11    Second Class\n",
       "1      Claire Gute 2016-11-11    Second Class\n",
       "2  Darrin Van Huff 2016-06-16    Second Class\n",
       "3   Sean O'Donnell 2015-10-18  Standard Class"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1=df[['Customer Name','Ship Date','Ship Mode']][0:4]\n",
    "df_1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>Bush Somerset Collection Bookcase</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>Hon Deluxe Fabric Upholstered Stacking Chairs,...</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Darrin Van Huff</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Customer Name                                       Product Name  \\\n",
       "0      Claire Gute                  Bush Somerset Collection Bookcase   \n",
       "1      Claire Gute  Hon Deluxe Fabric Upholstered Stacking Chairs,...   \n",
       "2  Darrin Van Huff  Self-Adhesive Address Labels for Typewriters b...   \n",
       "3   Sean O'Donnell      Bretford CR4500 Series Slim Rectangular Table   \n",
       "\n",
       "   Quantity  \n",
       "0         2  \n",
       "1         3  \n",
       "2         2  \n",
       "3         5  "
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_2=df[['Customer Name','Product Name','Quantity']][0:4]\n",
    "df_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Bush Somerset Collection Bookcase</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Hon Deluxe Fabric Upholstered Stacking Chairs,...</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Bush Somerset Collection Bookcase</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Hon Deluxe Fabric Upholstered Stacking Chairs,...</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Darrin Van Huff</td>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Customer Name  Ship Date       Ship Mode  \\\n",
       "0      Claire Gute 2016-11-11    Second Class   \n",
       "1      Claire Gute 2016-11-11    Second Class   \n",
       "2      Claire Gute 2016-11-11    Second Class   \n",
       "3      Claire Gute 2016-11-11    Second Class   \n",
       "4  Darrin Van Huff 2016-06-16    Second Class   \n",
       "5   Sean O'Donnell 2015-10-18  Standard Class   \n",
       "\n",
       "                                        Product Name  Quantity  \n",
       "0                  Bush Somerset Collection Bookcase         2  \n",
       "1  Hon Deluxe Fabric Upholstered Stacking Chairs,...         3  \n",
       "2                  Bush Somerset Collection Bookcase         2  \n",
       "3  Hon Deluxe Fabric Upholstered Stacking Chairs,...         3  \n",
       "4  Self-Adhesive Address Labels for Typewriters b...         2  \n",
       "5      Bretford CR4500 Series Slim Rectangular Table         5  "
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(df_1,df_2,on='Customer Name',how='inner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Bush Somerset Collection Bookcase</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Hon Deluxe Fabric Upholstered Stacking Chairs,...</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Darrin Van Huff</td>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Customer Name  Ship Date       Ship Mode  \\\n",
       "0      Claire Gute 2016-11-11    Second Class   \n",
       "1      Claire Gute 2016-11-11    Second Class   \n",
       "4  Darrin Van Huff 2016-06-16    Second Class   \n",
       "5   Sean O'Donnell 2015-10-18  Standard Class   \n",
       "\n",
       "                                        Product Name  Quantity  \n",
       "0                  Bush Somerset Collection Bookcase         2  \n",
       "1  Hon Deluxe Fabric Upholstered Stacking Chairs,...         3  \n",
       "4  Self-Adhesive Address Labels for Typewriters b...         2  \n",
       "5      Bretford CR4500 Series Slim Rectangular Table         5  "
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(df_1,df_2,on='Customer Name',how='inner').drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Darrin Van Huff</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>Eldon Fold 'N Roll Cart System</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>Eldon Expressions Wood and Plastic Desk Access...</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Customer Name                                       Product Name  \\\n",
       "2  Darrin Van Huff  Self-Adhesive Address Labels for Typewriters b...   \n",
       "3   Sean O'Donnell      Bretford CR4500 Series Slim Rectangular Table   \n",
       "4   Sean O'Donnell                     Eldon Fold 'N Roll Cart System   \n",
       "5  Brosina Hoffman  Eldon Expressions Wood and Plastic Desk Access...   \n",
       "\n",
       "   Quantity  \n",
       "2         2  \n",
       "3         5  \n",
       "4         2  \n",
       "5         7  "
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_3=df[['Customer Name','Product Name','Quantity']][2:6]\n",
    "df_3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Darrin Van Huff</td>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Eldon Fold 'N Roll Cart System</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Customer Name  Ship Date       Ship Mode  \\\n",
       "0  Darrin Van Huff 2016-06-16    Second Class   \n",
       "1   Sean O'Donnell 2015-10-18  Standard Class   \n",
       "2   Sean O'Donnell 2015-10-18  Standard Class   \n",
       "\n",
       "                                        Product Name  Quantity  \n",
       "0  Self-Adhesive Address Labels for Typewriters b...         2  \n",
       "1      Bretford CR4500 Series Slim Rectangular Table         5  \n",
       "2                     Eldon Fold 'N Roll Cart System         2  "
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(df_1,df_3,on='Customer Name',how='inner').drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Darrin Van Huff</td>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Sean O'Donnell</td>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Eldon Fold 'N Roll Cart System</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Brosina Hoffman</td>\n",
       "      <td>NaT</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Eldon Expressions Wood and Plastic Desk Access...</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     Customer Name  Ship Date       Ship Mode  \\\n",
       "0      Claire Gute 2016-11-11    Second Class   \n",
       "2  Darrin Van Huff 2016-06-16    Second Class   \n",
       "3   Sean O'Donnell 2015-10-18  Standard Class   \n",
       "4   Sean O'Donnell 2015-10-18  Standard Class   \n",
       "5  Brosina Hoffman        NaT             NaN   \n",
       "\n",
       "                                        Product Name  Quantity  \n",
       "0                                                NaN       NaN  \n",
       "2  Self-Adhesive Address Labels for Typewriters b...       2.0  \n",
       "3      Bretford CR4500 Series Slim Rectangular Table       5.0  \n",
       "4                     Eldon Fold 'N Roll Cart System       2.0  \n",
       "5  Eldon Expressions Wood and Plastic Desk Access...       7.0  "
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(df_1,df_3,on='Customer Name',how='outer').drop_duplicates()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 14: Join method"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Customer Name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Claire Gute</th>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Claire Gute</th>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Darrin Van Huff</th>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 Ship Date       Ship Mode\n",
       "Customer Name                             \n",
       "Claire Gute     2016-11-11    Second Class\n",
       "Claire Gute     2016-11-11    Second Class\n",
       "Darrin Van Huff 2016-06-16    Second Class\n",
       "Sean O'Donnell  2015-10-18  Standard Class"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1=df[['Customer Name','Ship Date','Ship Mode']][0:4]\n",
    "df_1.set_index(['Customer Name'],inplace=True)\n",
    "df_1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Customer Name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Darrin Van Huff</th>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>Eldon Fold 'N Roll Cart System</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Brosina Hoffman</th>\n",
       "      <td>Eldon Expressions Wood and Plastic Desk Access...</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                      Product Name  Quantity\n",
       "Customer Name                                                               \n",
       "Darrin Van Huff  Self-Adhesive Address Labels for Typewriters b...         2\n",
       "Sean O'Donnell       Bretford CR4500 Series Slim Rectangular Table         5\n",
       "Sean O'Donnell                      Eldon Fold 'N Roll Cart System         2\n",
       "Brosina Hoffman  Eldon Expressions Wood and Plastic Desk Access...         7"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_2=df[['Customer Name','Product Name','Quantity']][2:6]\n",
    "df_2.set_index(['Customer Name'],inplace=True)\n",
    "df_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Customer Name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Claire Gute</th>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Darrin Van Huff</th>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Eldon Fold 'N Roll Cart System</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 Ship Date       Ship Mode  \\\n",
       "Customer Name                                \n",
       "Claire Gute     2016-11-11    Second Class   \n",
       "Darrin Van Huff 2016-06-16    Second Class   \n",
       "Sean O'Donnell  2015-10-18  Standard Class   \n",
       "Sean O'Donnell  2015-10-18  Standard Class   \n",
       "\n",
       "                                                      Product Name  Quantity  \n",
       "Customer Name                                                                 \n",
       "Claire Gute                                                    NaN       NaN  \n",
       "Darrin Van Huff  Self-Adhesive Address Labels for Typewriters b...       2.0  \n",
       "Sean O'Donnell       Bretford CR4500 Series Slim Rectangular Table       5.0  \n",
       "Sean O'Donnell                      Eldon Fold 'N Roll Cart System       2.0  "
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1.join(df_2,how='left').drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Customer Name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Brosina Hoffman</th>\n",
       "      <td>NaT</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Eldon Expressions Wood and Plastic Desk Access...</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Darrin Van Huff</th>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Eldon Fold 'N Roll Cart System</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 Ship Date       Ship Mode  \\\n",
       "Customer Name                                \n",
       "Brosina Hoffman        NaT             NaN   \n",
       "Darrin Van Huff 2016-06-16    Second Class   \n",
       "Sean O'Donnell  2015-10-18  Standard Class   \n",
       "Sean O'Donnell  2015-10-18  Standard Class   \n",
       "\n",
       "                                                      Product Name  Quantity  \n",
       "Customer Name                                                                 \n",
       "Brosina Hoffman  Eldon Expressions Wood and Plastic Desk Access...         7  \n",
       "Darrin Van Huff  Self-Adhesive Address Labels for Typewriters b...         2  \n",
       "Sean O'Donnell       Bretford CR4500 Series Slim Rectangular Table         5  \n",
       "Sean O'Donnell                      Eldon Fold 'N Roll Cart System         2  "
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1.join(df_2,how='right').drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Customer Name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Darrin Van Huff</th>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Eldon Fold 'N Roll Cart System</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 Ship Date       Ship Mode  \\\n",
       "Customer Name                                \n",
       "Darrin Van Huff 2016-06-16    Second Class   \n",
       "Sean O'Donnell  2015-10-18  Standard Class   \n",
       "Sean O'Donnell  2015-10-18  Standard Class   \n",
       "\n",
       "                                                      Product Name  Quantity  \n",
       "Customer Name                                                                 \n",
       "Darrin Van Huff  Self-Adhesive Address Labels for Typewriters b...         2  \n",
       "Sean O'Donnell       Bretford CR4500 Series Slim Rectangular Table         5  \n",
       "Sean O'Donnell                      Eldon Fold 'N Roll Cart System         2  "
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1.join(df_2,how='inner').drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Customer Name</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Brosina Hoffman</th>\n",
       "      <td>NaT</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Eldon Expressions Wood and Plastic Desk Access...</td>\n",
       "      <td>7.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Claire Gute</th>\n",
       "      <td>2016-11-11</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Darrin Van Huff</th>\n",
       "      <td>2016-06-16</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>Self-Adhesive Address Labels for Typewriters b...</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Bretford CR4500 Series Slim Rectangular Table</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Sean O'Donnell</th>\n",
       "      <td>2015-10-18</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>Eldon Fold 'N Roll Cart System</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 Ship Date       Ship Mode  \\\n",
       "Customer Name                                \n",
       "Brosina Hoffman        NaT             NaN   \n",
       "Claire Gute     2016-11-11    Second Class   \n",
       "Darrin Van Huff 2016-06-16    Second Class   \n",
       "Sean O'Donnell  2015-10-18  Standard Class   \n",
       "Sean O'Donnell  2015-10-18  Standard Class   \n",
       "\n",
       "                                                      Product Name  Quantity  \n",
       "Customer Name                                                                 \n",
       "Brosina Hoffman  Eldon Expressions Wood and Plastic Desk Access...       7.0  \n",
       "Claire Gute                                                    NaN       NaN  \n",
       "Darrin Van Huff  Self-Adhesive Address Labels for Typewriters b...       2.0  \n",
       "Sean O'Donnell       Bretford CR4500 Series Slim Rectangular Table       5.0  \n",
       "Sean O'Donnell                      Eldon Fold 'N Roll Cart System       2.0  "
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1.join(df_2,how='outer').drop_duplicates()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Miscelleneous useful methods"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 15: Randomized sampling - `sample` method"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Order ID</th>\n",
       "      <th>Order Date</th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Customer ID</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Segment</th>\n",
       "      <th>City</th>\n",
       "      <th>State</th>\n",
       "      <th>Postal Code</th>\n",
       "      <th>Region</th>\n",
       "      <th>Product ID</th>\n",
       "      <th>Category</th>\n",
       "      <th>Sub-Category</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Discount</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>288</th>\n",
       "      <td>CA-2016-112697</td>\n",
       "      <td>2016-12-18</td>\n",
       "      <td>2016-12-20</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>AH-10195</td>\n",
       "      <td>Alan Haines</td>\n",
       "      <td>Corporate</td>\n",
       "      <td>Tamarac</td>\n",
       "      <td>Florida</td>\n",
       "      <td>33319</td>\n",
       "      <td>South</td>\n",
       "      <td>OFF-SU-10000646</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Supplies</td>\n",
       "      <td>Premier Automatic Letter Opener</td>\n",
       "      <td>961.480</td>\n",
       "      <td>5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>-204.3145</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7781</th>\n",
       "      <td>CA-2015-132136</td>\n",
       "      <td>2015-03-08</td>\n",
       "      <td>2015-03-12</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>FO-14305</td>\n",
       "      <td>Frank Olsen</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Chicago</td>\n",
       "      <td>Illinois</td>\n",
       "      <td>60623</td>\n",
       "      <td>Central</td>\n",
       "      <td>OFF-BI-10002706</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Binders</td>\n",
       "      <td>Avery Premier Heavy-Duty Binder with Round Loc...</td>\n",
       "      <td>8.568</td>\n",
       "      <td>3</td>\n",
       "      <td>0.8</td>\n",
       "      <td>-14.5656</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4638</th>\n",
       "      <td>CA-2017-168228</td>\n",
       "      <td>2017-04-27</td>\n",
       "      <td>2017-04-29</td>\n",
       "      <td>First Class</td>\n",
       "      <td>AP-10915</td>\n",
       "      <td>Arthur Prichep</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>California</td>\n",
       "      <td>90045</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-AR-10001725</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Art</td>\n",
       "      <td>Boston Home &amp; Office Model 2000 Electric Penci...</td>\n",
       "      <td>47.300</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.2980</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7672</th>\n",
       "      <td>CA-2015-130974</td>\n",
       "      <td>2015-11-27</td>\n",
       "      <td>2015-11-29</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>MA-17560</td>\n",
       "      <td>Matt Abelman</td>\n",
       "      <td>Home Office</td>\n",
       "      <td>Everett</td>\n",
       "      <td>Massachusetts</td>\n",
       "      <td>2149</td>\n",
       "      <td>East</td>\n",
       "      <td>FUR-FU-10002506</td>\n",
       "      <td>Furniture</td>\n",
       "      <td>Furnishings</td>\n",
       "      <td>Tensor \"Hersey Kiss\" Styled Floor Lamp</td>\n",
       "      <td>38.970</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.6764</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1348</th>\n",
       "      <td>CA-2014-118339</td>\n",
       "      <td>2014-03-17</td>\n",
       "      <td>2014-03-24</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>BN-11515</td>\n",
       "      <td>Bradley Nguyen</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Lakeville</td>\n",
       "      <td>Minnesota</td>\n",
       "      <td>55044</td>\n",
       "      <td>Central</td>\n",
       "      <td>OFF-BI-10001758</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Binders</td>\n",
       "      <td>Wilson Jones 14 Line Acrylic Coated Pressboard...</td>\n",
       "      <td>53.400</td>\n",
       "      <td>10</td>\n",
       "      <td>0.0</td>\n",
       "      <td>25.0980</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Order ID Order Date  Ship Date       Ship Mode Customer ID  \\\n",
       "288   CA-2016-112697 2016-12-18 2016-12-20    Second Class    AH-10195   \n",
       "7781  CA-2015-132136 2015-03-08 2015-03-12  Standard Class    FO-14305   \n",
       "4638  CA-2017-168228 2017-04-27 2017-04-29     First Class    AP-10915   \n",
       "7672  CA-2015-130974 2015-11-27 2015-11-29    Second Class    MA-17560   \n",
       "1348  CA-2014-118339 2014-03-17 2014-03-24  Standard Class    BN-11515   \n",
       "\n",
       "       Customer Name      Segment         City          State  Postal Code  \\\n",
       "288      Alan Haines    Corporate      Tamarac        Florida        33319   \n",
       "7781     Frank Olsen     Consumer      Chicago       Illinois        60623   \n",
       "4638  Arthur Prichep     Consumer  Los Angeles     California        90045   \n",
       "7672    Matt Abelman  Home Office      Everett  Massachusetts         2149   \n",
       "1348  Bradley Nguyen     Consumer    Lakeville      Minnesota        55044   \n",
       "\n",
       "       Region       Product ID         Category Sub-Category  \\\n",
       "288     South  OFF-SU-10000646  Office Supplies     Supplies   \n",
       "7781  Central  OFF-BI-10002706  Office Supplies      Binders   \n",
       "4638     West  OFF-AR-10001725  Office Supplies          Art   \n",
       "7672     East  FUR-FU-10002506        Furniture  Furnishings   \n",
       "1348  Central  OFF-BI-10001758  Office Supplies      Binders   \n",
       "\n",
       "                                           Product Name    Sales  Quantity  \\\n",
       "288                     Premier Automatic Letter Opener  961.480         5   \n",
       "7781  Avery Premier Heavy-Duty Binder with Round Loc...    8.568         3   \n",
       "4638  Boston Home & Office Model 2000 Electric Penci...   47.300         2   \n",
       "7672             Tensor \"Hersey Kiss\" Styled Floor Lamp   38.970         3   \n",
       "1348  Wilson Jones 14 Line Acrylic Coated Pressboard...   53.400        10   \n",
       "\n",
       "      Discount    Profit  \n",
       "288        0.2 -204.3145  \n",
       "7781       0.8  -14.5656  \n",
       "4638       0.0   12.2980  \n",
       "7672       0.0    4.6764  \n",
       "1348       0.0   25.0980  "
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sample(n=5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Order ID</th>\n",
       "      <th>Order Date</th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Customer ID</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Segment</th>\n",
       "      <th>City</th>\n",
       "      <th>State</th>\n",
       "      <th>Postal Code</th>\n",
       "      <th>Region</th>\n",
       "      <th>Product ID</th>\n",
       "      <th>Category</th>\n",
       "      <th>Sub-Category</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Discount</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>9107</th>\n",
       "      <td>CA-2015-132941</td>\n",
       "      <td>2015-05-25</td>\n",
       "      <td>2015-05-28</td>\n",
       "      <td>First Class</td>\n",
       "      <td>MM-18280</td>\n",
       "      <td>Muhammed MacIntyre</td>\n",
       "      <td>Corporate</td>\n",
       "      <td>Haltom City</td>\n",
       "      <td>Texas</td>\n",
       "      <td>76117</td>\n",
       "      <td>Central</td>\n",
       "      <td>OFF-SU-10002557</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Supplies</td>\n",
       "      <td>Fiskars Spring-Action Scissors</td>\n",
       "      <td>22.368</td>\n",
       "      <td>2</td>\n",
       "      <td>0.2</td>\n",
       "      <td>1.6776</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9679</th>\n",
       "      <td>CA-2015-130113</td>\n",
       "      <td>2015-12-27</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>AH-10030</td>\n",
       "      <td>Aaron Hawkins</td>\n",
       "      <td>Corporate</td>\n",
       "      <td>San Francisco</td>\n",
       "      <td>California</td>\n",
       "      <td>94122</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-ST-10000046</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Storage</td>\n",
       "      <td>Fellowes Super Stor/Drawer Files</td>\n",
       "      <td>323.100</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>61.3890</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6413</th>\n",
       "      <td>CA-2017-151211</td>\n",
       "      <td>2017-08-17</td>\n",
       "      <td>2017-08-23</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>AH-10120</td>\n",
       "      <td>Adrian Hane</td>\n",
       "      <td>Home Office</td>\n",
       "      <td>Louisville</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>40214</td>\n",
       "      <td>South</td>\n",
       "      <td>TEC-AC-10004510</td>\n",
       "      <td>Technology</td>\n",
       "      <td>Accessories</td>\n",
       "      <td>Logitech Desktop MK120 Mouse and keyboard Combo</td>\n",
       "      <td>98.160</td>\n",
       "      <td>6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.8160</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3342</th>\n",
       "      <td>CA-2016-145982</td>\n",
       "      <td>2016-08-27</td>\n",
       "      <td>2016-09-01</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>TB-21055</td>\n",
       "      <td>Ted Butterfield</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Quincy</td>\n",
       "      <td>Massachusetts</td>\n",
       "      <td>2169</td>\n",
       "      <td>East</td>\n",
       "      <td>FUR-TA-10001307</td>\n",
       "      <td>Furniture</td>\n",
       "      <td>Tables</td>\n",
       "      <td>SAFCO PlanMaster Heigh-Adjustable Drafting Tab...</td>\n",
       "      <td>244.615</td>\n",
       "      <td>1</td>\n",
       "      <td>0.3</td>\n",
       "      <td>20.9670</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9987</th>\n",
       "      <td>CA-2017-163629</td>\n",
       "      <td>2017-11-17</td>\n",
       "      <td>2017-11-21</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>RA-19885</td>\n",
       "      <td>Ruben Ausman</td>\n",
       "      <td>Corporate</td>\n",
       "      <td>Athens</td>\n",
       "      <td>Georgia</td>\n",
       "      <td>30605</td>\n",
       "      <td>South</td>\n",
       "      <td>TEC-AC-10001539</td>\n",
       "      <td>Technology</td>\n",
       "      <td>Accessories</td>\n",
       "      <td>Logitech G430 Surround Sound Gaming Headset wi...</td>\n",
       "      <td>79.990</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>28.7964</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>339</th>\n",
       "      <td>CA-2015-128167</td>\n",
       "      <td>2015-06-22</td>\n",
       "      <td>2015-06-26</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>KL-16645</td>\n",
       "      <td>Ken Lonsdale</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Layton</td>\n",
       "      <td>Utah</td>\n",
       "      <td>84041</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-FA-10000490</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Fasteners</td>\n",
       "      <td>OIC Binder Clips, Mini, 1/4\" Capacity, Black</td>\n",
       "      <td>4.960</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.3312</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4767</th>\n",
       "      <td>CA-2015-123155</td>\n",
       "      <td>2015-03-09</td>\n",
       "      <td>2015-03-12</td>\n",
       "      <td>First Class</td>\n",
       "      <td>NS-18640</td>\n",
       "      <td>Noel Staavos</td>\n",
       "      <td>Corporate</td>\n",
       "      <td>San Antonio</td>\n",
       "      <td>Texas</td>\n",
       "      <td>78207</td>\n",
       "      <td>Central</td>\n",
       "      <td>TEC-PH-10001809</td>\n",
       "      <td>Technology</td>\n",
       "      <td>Phones</td>\n",
       "      <td>Panasonic KX T7736-B Digital phone</td>\n",
       "      <td>359.880</td>\n",
       "      <td>3</td>\n",
       "      <td>0.2</td>\n",
       "      <td>22.4925</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7883</th>\n",
       "      <td>CA-2017-118017</td>\n",
       "      <td>2017-12-03</td>\n",
       "      <td>2017-12-06</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>LC-16870</td>\n",
       "      <td>Lena Cacioppo</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Thornton</td>\n",
       "      <td>Colorado</td>\n",
       "      <td>80229</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-PA-10002246</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Paper</td>\n",
       "      <td>Wirebound Four 2-3/4 x 5 Forms per Page, 400 S...</td>\n",
       "      <td>10.320</td>\n",
       "      <td>2</td>\n",
       "      <td>0.2</td>\n",
       "      <td>3.7410</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6469</th>\n",
       "      <td>CA-2016-163804</td>\n",
       "      <td>2016-12-02</td>\n",
       "      <td>2016-12-08</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>DB-13270</td>\n",
       "      <td>Deborah Brumfield</td>\n",
       "      <td>Home Office</td>\n",
       "      <td>Providence</td>\n",
       "      <td>Rhode Island</td>\n",
       "      <td>2908</td>\n",
       "      <td>East</td>\n",
       "      <td>FUR-FU-10004864</td>\n",
       "      <td>Furniture</td>\n",
       "      <td>Furnishings</td>\n",
       "      <td>Eldon 500 Class Desk Accessories</td>\n",
       "      <td>72.420</td>\n",
       "      <td>6</td>\n",
       "      <td>0.0</td>\n",
       "      <td>23.8986</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3715</th>\n",
       "      <td>CA-2014-135608</td>\n",
       "      <td>2014-12-08</td>\n",
       "      <td>2014-12-10</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>JK-15625</td>\n",
       "      <td>Jim Karlsson</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Olympia</td>\n",
       "      <td>Washington</td>\n",
       "      <td>98502</td>\n",
       "      <td>West</td>\n",
       "      <td>FUR-CH-10002602</td>\n",
       "      <td>Furniture</td>\n",
       "      <td>Chairs</td>\n",
       "      <td>DMI Arturo Collection Mission-style Design Woo...</td>\n",
       "      <td>603.920</td>\n",
       "      <td>5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>45.2940</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Order ID Order Date  Ship Date       Ship Mode Customer ID  \\\n",
       "9107  CA-2015-132941 2015-05-25 2015-05-28     First Class    MM-18280   \n",
       "9679  CA-2015-130113 2015-12-27 2015-12-31  Standard Class    AH-10030   \n",
       "6413  CA-2017-151211 2017-08-17 2017-08-23  Standard Class    AH-10120   \n",
       "3342  CA-2016-145982 2016-08-27 2016-09-01    Second Class    TB-21055   \n",
       "9987  CA-2017-163629 2017-11-17 2017-11-21  Standard Class    RA-19885   \n",
       "339   CA-2015-128167 2015-06-22 2015-06-26    Second Class    KL-16645   \n",
       "4767  CA-2015-123155 2015-03-09 2015-03-12     First Class    NS-18640   \n",
       "7883  CA-2017-118017 2017-12-03 2017-12-06    Second Class    LC-16870   \n",
       "6469  CA-2016-163804 2016-12-02 2016-12-08  Standard Class    DB-13270   \n",
       "3715  CA-2014-135608 2014-12-08 2014-12-10    Second Class    JK-15625   \n",
       "\n",
       "           Customer Name      Segment           City          State  \\\n",
       "9107  Muhammed MacIntyre    Corporate    Haltom City          Texas   \n",
       "9679       Aaron Hawkins    Corporate  San Francisco     California   \n",
       "6413         Adrian Hane  Home Office     Louisville       Kentucky   \n",
       "3342     Ted Butterfield     Consumer         Quincy  Massachusetts   \n",
       "9987        Ruben Ausman    Corporate         Athens        Georgia   \n",
       "339         Ken Lonsdale     Consumer         Layton           Utah   \n",
       "4767        Noel Staavos    Corporate    San Antonio          Texas   \n",
       "7883       Lena Cacioppo     Consumer       Thornton       Colorado   \n",
       "6469   Deborah Brumfield  Home Office     Providence   Rhode Island   \n",
       "3715        Jim Karlsson     Consumer        Olympia     Washington   \n",
       "\n",
       "      Postal Code   Region       Product ID         Category Sub-Category  \\\n",
       "9107        76117  Central  OFF-SU-10002557  Office Supplies     Supplies   \n",
       "9679        94122     West  OFF-ST-10000046  Office Supplies      Storage   \n",
       "6413        40214    South  TEC-AC-10004510       Technology  Accessories   \n",
       "3342         2169     East  FUR-TA-10001307        Furniture       Tables   \n",
       "9987        30605    South  TEC-AC-10001539       Technology  Accessories   \n",
       "339         84041     West  OFF-FA-10000490  Office Supplies    Fasteners   \n",
       "4767        78207  Central  TEC-PH-10001809       Technology       Phones   \n",
       "7883        80229     West  OFF-PA-10002246  Office Supplies        Paper   \n",
       "6469         2908     East  FUR-FU-10004864        Furniture  Furnishings   \n",
       "3715        98502     West  FUR-CH-10002602        Furniture       Chairs   \n",
       "\n",
       "                                           Product Name    Sales  Quantity  \\\n",
       "9107                     Fiskars Spring-Action Scissors   22.368         2   \n",
       "9679                   Fellowes Super Stor/Drawer Files  323.100         2   \n",
       "6413    Logitech Desktop MK120 Mouse and keyboard Combo   98.160         6   \n",
       "3342  SAFCO PlanMaster Heigh-Adjustable Drafting Tab...  244.615         1   \n",
       "9987  Logitech G430 Surround Sound Gaming Headset wi...   79.990         1   \n",
       "339        OIC Binder Clips, Mini, 1/4\" Capacity, Black    4.960         4   \n",
       "4767                 Panasonic KX T7736-B Digital phone  359.880         3   \n",
       "7883  Wirebound Four 2-3/4 x 5 Forms per Page, 400 S...   10.320         2   \n",
       "6469                   Eldon 500 Class Desk Accessories   72.420         6   \n",
       "3715  DMI Arturo Collection Mission-style Design Woo...  603.920         5   \n",
       "\n",
       "      Discount   Profit  \n",
       "9107       0.2   1.6776  \n",
       "9679       0.0  61.3890  \n",
       "6413       0.0   9.8160  \n",
       "3342       0.3  20.9670  \n",
       "9987       0.0  28.7964  \n",
       "339        0.0   2.3312  \n",
       "4767       0.2  22.4925  \n",
       "7883       0.2   3.7410  \n",
       "6469       0.0  23.8986  \n",
       "3715       0.2  45.2940  "
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sample(frac=0.001)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Order ID</th>\n",
       "      <th>Order Date</th>\n",
       "      <th>Ship Date</th>\n",
       "      <th>Ship Mode</th>\n",
       "      <th>Customer ID</th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>Segment</th>\n",
       "      <th>City</th>\n",
       "      <th>State</th>\n",
       "      <th>Postal Code</th>\n",
       "      <th>Region</th>\n",
       "      <th>Product ID</th>\n",
       "      <th>Category</th>\n",
       "      <th>Sub-Category</th>\n",
       "      <th>Product Name</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Discount</th>\n",
       "      <th>Profit</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5792</th>\n",
       "      <td>CA-2017-140186</td>\n",
       "      <td>2017-09-29</td>\n",
       "      <td>2017-10-02</td>\n",
       "      <td>First Class</td>\n",
       "      <td>PG-18820</td>\n",
       "      <td>Patrick Gardner</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Bakersfield</td>\n",
       "      <td>California</td>\n",
       "      <td>93309</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-AP-10002578</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Appliances</td>\n",
       "      <td>Fellowes Premier Superior Surge Suppressor, 10...</td>\n",
       "      <td>97.840</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>25.4384</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5933</th>\n",
       "      <td>US-2017-169551</td>\n",
       "      <td>2017-07-07</td>\n",
       "      <td>2017-07-09</td>\n",
       "      <td>First Class</td>\n",
       "      <td>RL-19615</td>\n",
       "      <td>Rob Lucas</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Philadelphia</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>19120</td>\n",
       "      <td>East</td>\n",
       "      <td>TEC-AC-10002018</td>\n",
       "      <td>Technology</td>\n",
       "      <td>Accessories</td>\n",
       "      <td>AmazonBasics 3-Button USB Wired Mouse</td>\n",
       "      <td>16.776</td>\n",
       "      <td>3</td>\n",
       "      <td>0.2</td>\n",
       "      <td>4.8231</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1953</th>\n",
       "      <td>CA-2017-157987</td>\n",
       "      <td>2017-09-02</td>\n",
       "      <td>2017-09-06</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>AC-10615</td>\n",
       "      <td>Ann Chong</td>\n",
       "      <td>Corporate</td>\n",
       "      <td>New York City</td>\n",
       "      <td>New York</td>\n",
       "      <td>10009</td>\n",
       "      <td>East</td>\n",
       "      <td>FUR-CH-10003379</td>\n",
       "      <td>Furniture</td>\n",
       "      <td>Chairs</td>\n",
       "      <td>Global Commerce Series High-Back Swivel/Tilt C...</td>\n",
       "      <td>1282.410</td>\n",
       "      <td>5</td>\n",
       "      <td>0.1</td>\n",
       "      <td>213.7350</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1112</th>\n",
       "      <td>US-2016-110156</td>\n",
       "      <td>2016-11-19</td>\n",
       "      <td>2016-11-24</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>EH-13945</td>\n",
       "      <td>Eric Hoffmann</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Houston</td>\n",
       "      <td>Texas</td>\n",
       "      <td>77041</td>\n",
       "      <td>Central</td>\n",
       "      <td>OFF-BI-10002609</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Binders</td>\n",
       "      <td>Avery Hidden Tab Dividers for Binding Systems</td>\n",
       "      <td>1.192</td>\n",
       "      <td>2</td>\n",
       "      <td>0.8</td>\n",
       "      <td>-2.0264</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5491</th>\n",
       "      <td>CA-2017-164098</td>\n",
       "      <td>2017-01-26</td>\n",
       "      <td>2017-01-27</td>\n",
       "      <td>First Class</td>\n",
       "      <td>CG-12520</td>\n",
       "      <td>Claire Gute</td>\n",
       "      <td>Consumer</td>\n",
       "      <td>Houston</td>\n",
       "      <td>Texas</td>\n",
       "      <td>77070</td>\n",
       "      <td>Central</td>\n",
       "      <td>OFF-ST-10000615</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Storage</td>\n",
       "      <td>SimpliFile Personal File, Black Granite, 15w x...</td>\n",
       "      <td>18.160</td>\n",
       "      <td>2</td>\n",
       "      <td>0.2</td>\n",
       "      <td>1.8160</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3400</th>\n",
       "      <td>CA-2014-102274</td>\n",
       "      <td>2014-11-21</td>\n",
       "      <td>2014-11-26</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>DH-13075</td>\n",
       "      <td>Dave Hallsten</td>\n",
       "      <td>Corporate</td>\n",
       "      <td>Richmond</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>40475</td>\n",
       "      <td>South</td>\n",
       "      <td>OFF-ST-10001511</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Storage</td>\n",
       "      <td>Space Solutions Commercial Steel Shelving</td>\n",
       "      <td>193.950</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.6975</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9465</th>\n",
       "      <td>CA-2016-126858</td>\n",
       "      <td>2016-11-19</td>\n",
       "      <td>2016-11-23</td>\n",
       "      <td>Standard Class</td>\n",
       "      <td>JM-15265</td>\n",
       "      <td>Janet Molinari</td>\n",
       "      <td>Corporate</td>\n",
       "      <td>Seattle</td>\n",
       "      <td>Washington</td>\n",
       "      <td>98115</td>\n",
       "      <td>West</td>\n",
       "      <td>FUR-FU-10000448</td>\n",
       "      <td>Furniture</td>\n",
       "      <td>Furnishings</td>\n",
       "      <td>Tenex Chairmats For Use With Carpeted Floors</td>\n",
       "      <td>31.960</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.5980</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2185</th>\n",
       "      <td>CA-2017-124576</td>\n",
       "      <td>2017-08-01</td>\n",
       "      <td>2017-08-04</td>\n",
       "      <td>Second Class</td>\n",
       "      <td>HK-14890</td>\n",
       "      <td>Heather Kirkland</td>\n",
       "      <td>Corporate</td>\n",
       "      <td>Salinas</td>\n",
       "      <td>California</td>\n",
       "      <td>93905</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-BI-10002735</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Binders</td>\n",
       "      <td>GBC Prestige Therm-A-Bind Covers</td>\n",
       "      <td>54.896</td>\n",
       "      <td>2</td>\n",
       "      <td>0.2</td>\n",
       "      <td>18.5274</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7065</th>\n",
       "      <td>CA-2014-106726</td>\n",
       "      <td>2014-12-06</td>\n",
       "      <td>2014-12-08</td>\n",
       "      <td>First Class</td>\n",
       "      <td>RS-19765</td>\n",
       "      <td>Roland Schwarz</td>\n",
       "      <td>Corporate</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>California</td>\n",
       "      <td>90008</td>\n",
       "      <td>West</td>\n",
       "      <td>OFF-ST-10001496</td>\n",
       "      <td>Office Supplies</td>\n",
       "      <td>Storage</td>\n",
       "      <td>Standard Rollaway File with Lock</td>\n",
       "      <td>1261.330</td>\n",
       "      <td>7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>327.9458</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1831</th>\n",
       "      <td>CA-2017-145884</td>\n",
       "      <td>2017-10-21</td>\n",
       "      <td>2017-10-21</td>\n",
       "      <td>Same Day</td>\n",
       "      <td>SL-20155</td>\n",
       "      <td>Sara Luxemburg</td>\n",
       "      <td>Home Office</td>\n",
       "      <td>Muskogee</td>\n",
       "      <td>Oklahoma</td>\n",
       "      <td>74403</td>\n",
       "      <td>Central</td>\n",
       "      <td>FUR-TA-10002356</td>\n",
       "      <td>Furniture</td>\n",
       "      <td>Tables</td>\n",
       "      <td>Bevis Boat-Shaped Conference Table</td>\n",
       "      <td>262.110</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>62.9064</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Order ID Order Date  Ship Date       Ship Mode Customer ID  \\\n",
       "5792  CA-2017-140186 2017-09-29 2017-10-02     First Class    PG-18820   \n",
       "5933  US-2017-169551 2017-07-07 2017-07-09     First Class    RL-19615   \n",
       "1953  CA-2017-157987 2017-09-02 2017-09-06  Standard Class    AC-10615   \n",
       "1112  US-2016-110156 2016-11-19 2016-11-24  Standard Class    EH-13945   \n",
       "5491  CA-2017-164098 2017-01-26 2017-01-27     First Class    CG-12520   \n",
       "3400  CA-2014-102274 2014-11-21 2014-11-26  Standard Class    DH-13075   \n",
       "9465  CA-2016-126858 2016-11-19 2016-11-23  Standard Class    JM-15265   \n",
       "2185  CA-2017-124576 2017-08-01 2017-08-04    Second Class    HK-14890   \n",
       "7065  CA-2014-106726 2014-12-06 2014-12-08     First Class    RS-19765   \n",
       "1831  CA-2017-145884 2017-10-21 2017-10-21        Same Day    SL-20155   \n",
       "\n",
       "         Customer Name      Segment           City         State  Postal Code  \\\n",
       "5792   Patrick Gardner     Consumer    Bakersfield    California        93309   \n",
       "5933         Rob Lucas     Consumer   Philadelphia  Pennsylvania        19120   \n",
       "1953         Ann Chong    Corporate  New York City      New York        10009   \n",
       "1112     Eric Hoffmann     Consumer        Houston         Texas        77041   \n",
       "5491       Claire Gute     Consumer        Houston         Texas        77070   \n",
       "3400     Dave Hallsten    Corporate       Richmond      Kentucky        40475   \n",
       "9465    Janet Molinari    Corporate        Seattle    Washington        98115   \n",
       "2185  Heather Kirkland    Corporate        Salinas    California        93905   \n",
       "7065    Roland Schwarz    Corporate    Los Angeles    California        90008   \n",
       "1831    Sara Luxemburg  Home Office       Muskogee      Oklahoma        74403   \n",
       "\n",
       "       Region       Product ID         Category Sub-Category  \\\n",
       "5792     West  OFF-AP-10002578  Office Supplies   Appliances   \n",
       "5933     East  TEC-AC-10002018       Technology  Accessories   \n",
       "1953     East  FUR-CH-10003379        Furniture       Chairs   \n",
       "1112  Central  OFF-BI-10002609  Office Supplies      Binders   \n",
       "5491  Central  OFF-ST-10000615  Office Supplies      Storage   \n",
       "3400    South  OFF-ST-10001511  Office Supplies      Storage   \n",
       "9465     West  FUR-FU-10000448        Furniture  Furnishings   \n",
       "2185     West  OFF-BI-10002735  Office Supplies      Binders   \n",
       "7065     West  OFF-ST-10001496  Office Supplies      Storage   \n",
       "1831  Central  FUR-TA-10002356        Furniture       Tables   \n",
       "\n",
       "                                           Product Name     Sales  Quantity  \\\n",
       "5792  Fellowes Premier Superior Surge Suppressor, 10...    97.840         2   \n",
       "5933              AmazonBasics 3-Button USB Wired Mouse    16.776         3   \n",
       "1953  Global Commerce Series High-Back Swivel/Tilt C...  1282.410         5   \n",
       "1112      Avery Hidden Tab Dividers for Binding Systems     1.192         2   \n",
       "5491  SimpliFile Personal File, Black Granite, 15w x...    18.160         2   \n",
       "3400          Space Solutions Commercial Steel Shelving   193.950         3   \n",
       "9465       Tenex Chairmats For Use With Carpeted Floors    31.960         2   \n",
       "2185                   GBC Prestige Therm-A-Bind Covers    54.896         2   \n",
       "7065                   Standard Rollaway File with Lock  1261.330         7   \n",
       "1831                 Bevis Boat-Shaped Conference Table   262.110         1   \n",
       "\n",
       "      Discount    Profit  \n",
       "5792       0.0   25.4384  \n",
       "5933       0.2    4.8231  \n",
       "1953       0.1  213.7350  \n",
       "1112       0.8   -2.0264  \n",
       "5491       0.2    1.8160  \n",
       "3400       0.0    9.6975  \n",
       "9465       0.0    1.5980  \n",
       "2185       0.2   18.5274  \n",
       "7065       0.0  327.9458  \n",
       "1831       0.0   62.9064  "
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sample(frac=0.001,replace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 16: Pandas `value_count` method to return unique records"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "William Brown          37\n",
       "Paul Prost             34\n",
       "John Lee               34\n",
       "Matt Abelman           34\n",
       "Jonathan Doherty       32\n",
       "Edward Hooks           32\n",
       "Chloris Kastensmidt    32\n",
       "Seth Vernon            32\n",
       "Emily Phan             31\n",
       "Arthur Prichep         31\n",
       "Name: Customer Name, dtype: int64"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Customer Name'].value_counts()[:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 17: Pivot table functionality - `pivot_table`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_sample=df.sample(n=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>Profit</th>\n",
       "      <th>Quantity</th>\n",
       "      <th>Sales</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Region</th>\n",
       "      <th>State</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"6\" valign=\"top\">Central</th>\n",
       "      <th>Illinois</th>\n",
       "      <td>-13.785818</td>\n",
       "      <td>4.727273</td>\n",
       "      <td>217.450727</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Michigan</th>\n",
       "      <td>138.692167</td>\n",
       "      <td>3.333333</td>\n",
       "      <td>532.633333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Minnesota</th>\n",
       "      <td>3.882200</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>8.260000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Oklahoma</th>\n",
       "      <td>24.913200</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>63.880000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Texas</th>\n",
       "      <td>-42.241300</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>206.759100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Wisconsin</th>\n",
       "      <td>585.552000</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>1951.840000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"9\" valign=\"top\">East</th>\n",
       "      <th>Connecticut</th>\n",
       "      <td>2.822400</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>5.760000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Delaware</th>\n",
       "      <td>26.565000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>57.750000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Maryland</th>\n",
       "      <td>355.446600</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>826.620000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Massachusetts</th>\n",
       "      <td>315.740400</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>1088.760000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New Jersey</th>\n",
       "      <td>-19.918400</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>174.286000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New York</th>\n",
       "      <td>2.481500</td>\n",
       "      <td>4.545455</td>\n",
       "      <td>491.668545</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>3.876050</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>22.880000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Pennsylvania</th>\n",
       "      <td>-17.853600</td>\n",
       "      <td>3.857143</td>\n",
       "      <td>243.960714</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Rhode Island</th>\n",
       "      <td>6.220800</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>12.960000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"6\" valign=\"top\">South</th>\n",
       "      <th>Florida</th>\n",
       "      <td>-40.166400</td>\n",
       "      <td>4.750000</td>\n",
       "      <td>991.250000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Georgia</th>\n",
       "      <td>3.712800</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>14.280000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Kentucky</th>\n",
       "      <td>76.112500</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>304.450000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>North Carolina</th>\n",
       "      <td>9.480600</td>\n",
       "      <td>1.500000</td>\n",
       "      <td>118.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tennessee</th>\n",
       "      <td>-4.687800</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>9.306000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Virginia</th>\n",
       "      <td>27.810733</td>\n",
       "      <td>4.333333</td>\n",
       "      <td>102.443333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"6\" valign=\"top\">West</th>\n",
       "      <th>Arizona</th>\n",
       "      <td>11.757600</td>\n",
       "      <td>3.500000</td>\n",
       "      <td>40.260000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>California</th>\n",
       "      <td>43.626859</td>\n",
       "      <td>3.954545</td>\n",
       "      <td>187.089727</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Idaho</th>\n",
       "      <td>160.176600</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>696.420000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Montana</th>\n",
       "      <td>2.209800</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>6.096000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Oregon</th>\n",
       "      <td>3.597100</td>\n",
       "      <td>4.666667</td>\n",
       "      <td>48.186667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Washington</th>\n",
       "      <td>1385.525000</td>\n",
       "      <td>3.600000</td>\n",
       "      <td>3035.828000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                             Profit  Quantity        Sales\n",
       "Region  State                                             \n",
       "Central Illinois         -13.785818  4.727273   217.450727\n",
       "        Michigan         138.692167  3.333333   532.633333\n",
       "        Minnesota          3.882200  2.000000     8.260000\n",
       "        Oklahoma          24.913200  4.000000    63.880000\n",
       "        Texas            -42.241300  5.000000   206.759100\n",
       "        Wisconsin        585.552000  8.000000  1951.840000\n",
       "East    Connecticut        2.822400  2.000000     5.760000\n",
       "        Delaware          26.565000  5.000000    57.750000\n",
       "        Maryland         355.446600  3.000000   826.620000\n",
       "        Massachusetts    315.740400  6.000000  1088.760000\n",
       "        New Jersey       -19.918400  2.000000   174.286000\n",
       "        New York           2.481500  4.545455   491.668545\n",
       "        Ohio               3.876050  2.000000    22.880000\n",
       "        Pennsylvania     -17.853600  3.857143   243.960714\n",
       "        Rhode Island       6.220800  2.000000    12.960000\n",
       "South   Florida          -40.166400  4.750000   991.250000\n",
       "        Georgia            3.712800  4.000000    14.280000\n",
       "        Kentucky          76.112500  5.000000   304.450000\n",
       "        North Carolina     9.480600  1.500000   118.800000\n",
       "        Tennessee         -4.687800  2.000000     9.306000\n",
       "        Virginia          27.810733  4.333333   102.443333\n",
       "West    Arizona           11.757600  3.500000    40.260000\n",
       "        California        43.626859  3.954545   187.089727\n",
       "        Idaho            160.176600  2.000000   696.420000\n",
       "        Montana            2.209800  2.000000     6.096000\n",
       "        Oregon             3.597100  4.666667    48.186667\n",
       "        Washington      1385.525000  3.600000  3035.828000"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sample.pivot_table(values=['Sales','Quantity','Profit'],index=['Region','State'],aggfunc='mean')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 18: Sorting by particular column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5144</th>\n",
       "      <td>John Lee</td>\n",
       "      <td>California</td>\n",
       "      <td>86.260</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9908</th>\n",
       "      <td>Ken Dana</td>\n",
       "      <td>California</td>\n",
       "      <td>207.350</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4989</th>\n",
       "      <td>Arthur Wiediger</td>\n",
       "      <td>California</td>\n",
       "      <td>6.672</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3384</th>\n",
       "      <td>Maribeth Schnelling</td>\n",
       "      <td>New York</td>\n",
       "      <td>1799.750</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6118</th>\n",
       "      <td>Justin Ritter</td>\n",
       "      <td>Oregon</td>\n",
       "      <td>191.976</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2073</th>\n",
       "      <td>Deborah Brumfield</td>\n",
       "      <td>California</td>\n",
       "      <td>59.970</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4115</th>\n",
       "      <td>Amy Cox</td>\n",
       "      <td>Washington</td>\n",
       "      <td>1219.960</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6572</th>\n",
       "      <td>Roger Demir</td>\n",
       "      <td>Washington</td>\n",
       "      <td>11.960</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6750</th>\n",
       "      <td>Becky Pak</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>121.960</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3026</th>\n",
       "      <td>Meg O'Connel</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>254.352</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5502</th>\n",
       "      <td>Thea Hudgings</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>3.312</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6667</th>\n",
       "      <td>Cynthia Arntzen</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>6.160</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9884</th>\n",
       "      <td>Katrina Edelman</td>\n",
       "      <td>California</td>\n",
       "      <td>62.310</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3415</th>\n",
       "      <td>Sonia Sunley</td>\n",
       "      <td>Wisconsin</td>\n",
       "      <td>1.810</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8995</th>\n",
       "      <td>Christine Sundaresam</td>\n",
       "      <td>Florida</td>\n",
       "      <td>152.240</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Customer Name         State     Sales  Quantity\n",
       "5144              John Lee    California    86.260         2\n",
       "9908              Ken Dana    California   207.350         5\n",
       "4989       Arthur Wiediger    California     6.672         3\n",
       "3384   Maribeth Schnelling      New York  1799.750         5\n",
       "6118         Justin Ritter        Oregon   191.976         3\n",
       "2073     Deborah Brumfield    California    59.970         3\n",
       "4115               Amy Cox    Washington  1219.960         5\n",
       "6572           Roger Demir    Washington    11.960         2\n",
       "6750             Becky Pak    New Jersey   121.960         2\n",
       "3026          Meg O'Connel  Pennsylvania   254.352         3\n",
       "5502         Thea Hudgings  Pennsylvania     3.312         1\n",
       "6667       Cynthia Arntzen    New Jersey     6.160         2\n",
       "9884       Katrina Edelman    California    62.310         3\n",
       "3415          Sonia Sunley     Wisconsin     1.810         1\n",
       "8995  Christine Sundaresam       Florida   152.240         5"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sample=df[['Customer Name','State','Sales','Quantity']].sample(n=15)\n",
    "df_sample"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3415</th>\n",
       "      <td>Sonia Sunley</td>\n",
       "      <td>Wisconsin</td>\n",
       "      <td>1.810</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5502</th>\n",
       "      <td>Thea Hudgings</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>3.312</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6667</th>\n",
       "      <td>Cynthia Arntzen</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>6.160</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4989</th>\n",
       "      <td>Arthur Wiediger</td>\n",
       "      <td>California</td>\n",
       "      <td>6.672</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6572</th>\n",
       "      <td>Roger Demir</td>\n",
       "      <td>Washington</td>\n",
       "      <td>11.960</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2073</th>\n",
       "      <td>Deborah Brumfield</td>\n",
       "      <td>California</td>\n",
       "      <td>59.970</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9884</th>\n",
       "      <td>Katrina Edelman</td>\n",
       "      <td>California</td>\n",
       "      <td>62.310</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5144</th>\n",
       "      <td>John Lee</td>\n",
       "      <td>California</td>\n",
       "      <td>86.260</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6750</th>\n",
       "      <td>Becky Pak</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>121.960</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8995</th>\n",
       "      <td>Christine Sundaresam</td>\n",
       "      <td>Florida</td>\n",
       "      <td>152.240</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6118</th>\n",
       "      <td>Justin Ritter</td>\n",
       "      <td>Oregon</td>\n",
       "      <td>191.976</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9908</th>\n",
       "      <td>Ken Dana</td>\n",
       "      <td>California</td>\n",
       "      <td>207.350</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3026</th>\n",
       "      <td>Meg O'Connel</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>254.352</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4115</th>\n",
       "      <td>Amy Cox</td>\n",
       "      <td>Washington</td>\n",
       "      <td>1219.960</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3384</th>\n",
       "      <td>Maribeth Schnelling</td>\n",
       "      <td>New York</td>\n",
       "      <td>1799.750</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Customer Name         State     Sales  Quantity\n",
       "3415          Sonia Sunley     Wisconsin     1.810         1\n",
       "5502         Thea Hudgings  Pennsylvania     3.312         1\n",
       "6667       Cynthia Arntzen    New Jersey     6.160         2\n",
       "4989       Arthur Wiediger    California     6.672         3\n",
       "6572           Roger Demir    Washington    11.960         2\n",
       "2073     Deborah Brumfield    California    59.970         3\n",
       "9884       Katrina Edelman    California    62.310         3\n",
       "5144              John Lee    California    86.260         2\n",
       "6750             Becky Pak    New Jersey   121.960         2\n",
       "8995  Christine Sundaresam       Florida   152.240         5\n",
       "6118         Justin Ritter        Oregon   191.976         3\n",
       "9908              Ken Dana    California   207.350         5\n",
       "3026          Meg O'Connel  Pennsylvania   254.352         3\n",
       "4115               Amy Cox    Washington  1219.960         5\n",
       "3384   Maribeth Schnelling      New York  1799.750         5"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sample.sort_values(by='Sales')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4989</th>\n",
       "      <td>Arthur Wiediger</td>\n",
       "      <td>California</td>\n",
       "      <td>6.672</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2073</th>\n",
       "      <td>Deborah Brumfield</td>\n",
       "      <td>California</td>\n",
       "      <td>59.970</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9884</th>\n",
       "      <td>Katrina Edelman</td>\n",
       "      <td>California</td>\n",
       "      <td>62.310</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5144</th>\n",
       "      <td>John Lee</td>\n",
       "      <td>California</td>\n",
       "      <td>86.260</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9908</th>\n",
       "      <td>Ken Dana</td>\n",
       "      <td>California</td>\n",
       "      <td>207.350</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8995</th>\n",
       "      <td>Christine Sundaresam</td>\n",
       "      <td>Florida</td>\n",
       "      <td>152.240</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6667</th>\n",
       "      <td>Cynthia Arntzen</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>6.160</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6750</th>\n",
       "      <td>Becky Pak</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>121.960</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3384</th>\n",
       "      <td>Maribeth Schnelling</td>\n",
       "      <td>New York</td>\n",
       "      <td>1799.750</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6118</th>\n",
       "      <td>Justin Ritter</td>\n",
       "      <td>Oregon</td>\n",
       "      <td>191.976</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5502</th>\n",
       "      <td>Thea Hudgings</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>3.312</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3026</th>\n",
       "      <td>Meg O'Connel</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>254.352</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6572</th>\n",
       "      <td>Roger Demir</td>\n",
       "      <td>Washington</td>\n",
       "      <td>11.960</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4115</th>\n",
       "      <td>Amy Cox</td>\n",
       "      <td>Washington</td>\n",
       "      <td>1219.960</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3415</th>\n",
       "      <td>Sonia Sunley</td>\n",
       "      <td>Wisconsin</td>\n",
       "      <td>1.810</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Customer Name         State     Sales  Quantity\n",
       "4989       Arthur Wiediger    California     6.672         3\n",
       "2073     Deborah Brumfield    California    59.970         3\n",
       "9884       Katrina Edelman    California    62.310         3\n",
       "5144              John Lee    California    86.260         2\n",
       "9908              Ken Dana    California   207.350         5\n",
       "8995  Christine Sundaresam       Florida   152.240         5\n",
       "6667       Cynthia Arntzen    New Jersey     6.160         2\n",
       "6750             Becky Pak    New Jersey   121.960         2\n",
       "3384   Maribeth Schnelling      New York  1799.750         5\n",
       "6118         Justin Ritter        Oregon   191.976         3\n",
       "5502         Thea Hudgings  Pennsylvania     3.312         1\n",
       "3026          Meg O'Connel  Pennsylvania   254.352         3\n",
       "6572           Roger Demir    Washington    11.960         2\n",
       "4115               Amy Cox    Washington  1219.960         5\n",
       "3415          Sonia Sunley     Wisconsin     1.810         1"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sample.sort_values(by=['State','Sales'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Exercise 19: Flexibility for user-defined function with `apply` method"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "def categorize_sales(price):\n",
    "    if price < 50:\n",
    "        return \"Low\"\n",
    "    elif price < 200:\n",
    "        return \"Medium\"\n",
    "    else:\n",
    "        return \"High\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7892</th>\n",
       "      <td>Trudy Schmidt</td>\n",
       "      <td>Arkansas</td>\n",
       "      <td>106.320</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7217</th>\n",
       "      <td>Lena Cacioppo</td>\n",
       "      <td>California</td>\n",
       "      <td>62.496</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9006</th>\n",
       "      <td>Nona Balk</td>\n",
       "      <td>Wisconsin</td>\n",
       "      <td>37.760</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5762</th>\n",
       "      <td>Justin MacKendrick</td>\n",
       "      <td>California</td>\n",
       "      <td>54.920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7470</th>\n",
       "      <td>Ann Chong</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>3.576</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4434</th>\n",
       "      <td>Arthur Prichep</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>842.940</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4712</th>\n",
       "      <td>Ed Jacobs</td>\n",
       "      <td>Texas</td>\n",
       "      <td>36.288</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8003</th>\n",
       "      <td>Anna Andreadi</td>\n",
       "      <td>Massachusetts</td>\n",
       "      <td>271.900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3237</th>\n",
       "      <td>Cindy Chapman</td>\n",
       "      <td>New York</td>\n",
       "      <td>45.576</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5159</th>\n",
       "      <td>Andrew Gjertsen</td>\n",
       "      <td>Arizona</td>\n",
       "      <td>105.584</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           Customer Name          State    Sales\n",
       "7892       Trudy Schmidt       Arkansas  106.320\n",
       "7217       Lena Cacioppo     California   62.496\n",
       "9006           Nona Balk      Wisconsin   37.760\n",
       "5762  Justin MacKendrick     California   54.920\n",
       "7470           Ann Chong   Pennsylvania    3.576\n",
       "4434      Arthur Prichep       Kentucky  842.940\n",
       "4712           Ed Jacobs          Texas   36.288\n",
       "8003       Anna Andreadi  Massachusetts  271.900\n",
       "3237       Cindy Chapman       New York   45.576\n",
       "5159     Andrew Gjertsen        Arizona  105.584"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sample=df[['Customer Name','State','Sales']].sample(n=100)\n",
    "df_sample.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Sales Price Category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7892</th>\n",
       "      <td>Trudy Schmidt</td>\n",
       "      <td>Arkansas</td>\n",
       "      <td>106.320</td>\n",
       "      <td>Medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7217</th>\n",
       "      <td>Lena Cacioppo</td>\n",
       "      <td>California</td>\n",
       "      <td>62.496</td>\n",
       "      <td>Medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9006</th>\n",
       "      <td>Nona Balk</td>\n",
       "      <td>Wisconsin</td>\n",
       "      <td>37.760</td>\n",
       "      <td>Low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5762</th>\n",
       "      <td>Justin MacKendrick</td>\n",
       "      <td>California</td>\n",
       "      <td>54.920</td>\n",
       "      <td>Medium</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7470</th>\n",
       "      <td>Ann Chong</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>3.576</td>\n",
       "      <td>Low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4434</th>\n",
       "      <td>Arthur Prichep</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>842.940</td>\n",
       "      <td>High</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4712</th>\n",
       "      <td>Ed Jacobs</td>\n",
       "      <td>Texas</td>\n",
       "      <td>36.288</td>\n",
       "      <td>Low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8003</th>\n",
       "      <td>Anna Andreadi</td>\n",
       "      <td>Massachusetts</td>\n",
       "      <td>271.900</td>\n",
       "      <td>High</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3237</th>\n",
       "      <td>Cindy Chapman</td>\n",
       "      <td>New York</td>\n",
       "      <td>45.576</td>\n",
       "      <td>Low</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5159</th>\n",
       "      <td>Andrew Gjertsen</td>\n",
       "      <td>Arizona</td>\n",
       "      <td>105.584</td>\n",
       "      <td>Medium</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           Customer Name          State    Sales Sales Price Category\n",
       "7892       Trudy Schmidt       Arkansas  106.320               Medium\n",
       "7217       Lena Cacioppo     California   62.496               Medium\n",
       "9006           Nona Balk      Wisconsin   37.760                  Low\n",
       "5762  Justin MacKendrick     California   54.920               Medium\n",
       "7470           Ann Chong   Pennsylvania    3.576                  Low\n",
       "4434      Arthur Prichep       Kentucky  842.940                 High\n",
       "4712           Ed Jacobs          Texas   36.288                  Low\n",
       "8003       Anna Andreadi  Massachusetts  271.900                 High\n",
       "3237       Cindy Chapman       New York   45.576                  Low\n",
       "5159     Andrew Gjertsen        Arizona  105.584               Medium"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sample['Sales Price Category']=df_sample['Sales'].apply(categorize_sales)\n",
    "df_sample.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Sales Price Category</th>\n",
       "      <th>Customer Name Length</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7892</th>\n",
       "      <td>Trudy Schmidt</td>\n",
       "      <td>Arkansas</td>\n",
       "      <td>106.320</td>\n",
       "      <td>Medium</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7217</th>\n",
       "      <td>Lena Cacioppo</td>\n",
       "      <td>California</td>\n",
       "      <td>62.496</td>\n",
       "      <td>Medium</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9006</th>\n",
       "      <td>Nona Balk</td>\n",
       "      <td>Wisconsin</td>\n",
       "      <td>37.760</td>\n",
       "      <td>Low</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5762</th>\n",
       "      <td>Justin MacKendrick</td>\n",
       "      <td>California</td>\n",
       "      <td>54.920</td>\n",
       "      <td>Medium</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7470</th>\n",
       "      <td>Ann Chong</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>3.576</td>\n",
       "      <td>Low</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4434</th>\n",
       "      <td>Arthur Prichep</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>842.940</td>\n",
       "      <td>High</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4712</th>\n",
       "      <td>Ed Jacobs</td>\n",
       "      <td>Texas</td>\n",
       "      <td>36.288</td>\n",
       "      <td>Low</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8003</th>\n",
       "      <td>Anna Andreadi</td>\n",
       "      <td>Massachusetts</td>\n",
       "      <td>271.900</td>\n",
       "      <td>High</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3237</th>\n",
       "      <td>Cindy Chapman</td>\n",
       "      <td>New York</td>\n",
       "      <td>45.576</td>\n",
       "      <td>Low</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5159</th>\n",
       "      <td>Andrew Gjertsen</td>\n",
       "      <td>Arizona</td>\n",
       "      <td>105.584</td>\n",
       "      <td>Medium</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           Customer Name          State    Sales Sales Price Category  \\\n",
       "7892       Trudy Schmidt       Arkansas  106.320               Medium   \n",
       "7217       Lena Cacioppo     California   62.496               Medium   \n",
       "9006           Nona Balk      Wisconsin   37.760                  Low   \n",
       "5762  Justin MacKendrick     California   54.920               Medium   \n",
       "7470           Ann Chong   Pennsylvania    3.576                  Low   \n",
       "4434      Arthur Prichep       Kentucky  842.940                 High   \n",
       "4712           Ed Jacobs          Texas   36.288                  Low   \n",
       "8003       Anna Andreadi  Massachusetts  271.900                 High   \n",
       "3237       Cindy Chapman       New York   45.576                  Low   \n",
       "5159     Andrew Gjertsen        Arizona  105.584               Medium   \n",
       "\n",
       "      Customer Name Length  \n",
       "7892                    13  \n",
       "7217                    13  \n",
       "9006                     9  \n",
       "5762                    18  \n",
       "7470                     9  \n",
       "4434                    14  \n",
       "4712                     9  \n",
       "8003                    13  \n",
       "3237                    13  \n",
       "5159                    15  "
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sample['Customer Name Length']=df_sample['Customer Name'].apply(len)\n",
    "df_sample.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Customer Name</th>\n",
       "      <th>State</th>\n",
       "      <th>Sales</th>\n",
       "      <th>Sales Price Category</th>\n",
       "      <th>Customer Name Length</th>\n",
       "      <th>Discounted Price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7892</th>\n",
       "      <td>Trudy Schmidt</td>\n",
       "      <td>Arkansas</td>\n",
       "      <td>106.320</td>\n",
       "      <td>Medium</td>\n",
       "      <td>13</td>\n",
       "      <td>106.320</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7217</th>\n",
       "      <td>Lena Cacioppo</td>\n",
       "      <td>California</td>\n",
       "      <td>62.496</td>\n",
       "      <td>Medium</td>\n",
       "      <td>13</td>\n",
       "      <td>62.496</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9006</th>\n",
       "      <td>Nona Balk</td>\n",
       "      <td>Wisconsin</td>\n",
       "      <td>37.760</td>\n",
       "      <td>Low</td>\n",
       "      <td>9</td>\n",
       "      <td>37.760</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5762</th>\n",
       "      <td>Justin MacKendrick</td>\n",
       "      <td>California</td>\n",
       "      <td>54.920</td>\n",
       "      <td>Medium</td>\n",
       "      <td>18</td>\n",
       "      <td>54.920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7470</th>\n",
       "      <td>Ann Chong</td>\n",
       "      <td>Pennsylvania</td>\n",
       "      <td>3.576</td>\n",
       "      <td>Low</td>\n",
       "      <td>9</td>\n",
       "      <td>3.576</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4434</th>\n",
       "      <td>Arthur Prichep</td>\n",
       "      <td>Kentucky</td>\n",
       "      <td>842.940</td>\n",
       "      <td>High</td>\n",
       "      <td>14</td>\n",
       "      <td>716.499</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4712</th>\n",
       "      <td>Ed Jacobs</td>\n",
       "      <td>Texas</td>\n",
       "      <td>36.288</td>\n",
       "      <td>Low</td>\n",
       "      <td>9</td>\n",
       "      <td>36.288</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8003</th>\n",
       "      <td>Anna Andreadi</td>\n",
       "      <td>Massachusetts</td>\n",
       "      <td>271.900</td>\n",
       "      <td>High</td>\n",
       "      <td>13</td>\n",
       "      <td>231.115</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3237</th>\n",
       "      <td>Cindy Chapman</td>\n",
       "      <td>New York</td>\n",
       "      <td>45.576</td>\n",
       "      <td>Low</td>\n",
       "      <td>13</td>\n",
       "      <td>45.576</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5159</th>\n",
       "      <td>Andrew Gjertsen</td>\n",
       "      <td>Arizona</td>\n",
       "      <td>105.584</td>\n",
       "      <td>Medium</td>\n",
       "      <td>15</td>\n",
       "      <td>105.584</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           Customer Name          State    Sales Sales Price Category  \\\n",
       "7892       Trudy Schmidt       Arkansas  106.320               Medium   \n",
       "7217       Lena Cacioppo     California   62.496               Medium   \n",
       "9006           Nona Balk      Wisconsin   37.760                  Low   \n",
       "5762  Justin MacKendrick     California   54.920               Medium   \n",
       "7470           Ann Chong   Pennsylvania    3.576                  Low   \n",
       "4434      Arthur Prichep       Kentucky  842.940                 High   \n",
       "4712           Ed Jacobs          Texas   36.288                  Low   \n",
       "8003       Anna Andreadi  Massachusetts  271.900                 High   \n",
       "3237       Cindy Chapman       New York   45.576                  Low   \n",
       "5159     Andrew Gjertsen        Arizona  105.584               Medium   \n",
       "\n",
       "      Customer Name Length  Discounted Price  \n",
       "7892                    13           106.320  \n",
       "7217                    13            62.496  \n",
       "9006                     9            37.760  \n",
       "5762                    18            54.920  \n",
       "7470                     9             3.576  \n",
       "4434                    14           716.499  \n",
       "4712                     9            36.288  \n",
       "8003                    13           231.115  \n",
       "3237                    13            45.576  \n",
       "5159                    15           105.584  "
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_sample['Discounted Price']=df_sample['Sales'].apply(lambda x:0.85*x if x>200 else x)\n",
    "df_sample.head(10)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
